summaryrefslogtreecommitdiffstats
path: root/tools/testing
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing')
-rw-r--r--tools/testing/fault-injection/failcmd.sh220
-rwxr-xr-xtools/testing/ktest/compare-ktest-sample.pl33
-rwxr-xr-xtools/testing/ktest/config-bisect.pl770
-rw-r--r--tools/testing/ktest/examples/README32
-rw-r--r--tools/testing/ktest/examples/crosstests.conf225
-rw-r--r--tools/testing/ktest/examples/include/bisect.conf90
-rw-r--r--tools/testing/ktest/examples/include/defaults.conf157
-rw-r--r--tools/testing/ktest/examples/include/min-config.conf60
-rw-r--r--tools/testing/ktest/examples/include/patchcheck.conf111
-rw-r--r--tools/testing/ktest/examples/include/tests.conf74
-rw-r--r--tools/testing/ktest/examples/kvm.conf92
-rw-r--r--tools/testing/ktest/examples/snowball.conf53
-rw-r--r--tools/testing/ktest/examples/test.conf62
-rwxr-xr-xtools/testing/ktest/ktest.pl4425
-rw-r--r--tools/testing/ktest/sample.conf1348
-rw-r--r--tools/testing/nvdimm/Kbuild84
-rw-r--r--tools/testing/nvdimm/Makefile8
-rw-r--r--tools/testing/nvdimm/acpi_nfit_test.c8
-rw-r--r--tools/testing/nvdimm/config_check.c18
-rw-r--r--tools/testing/nvdimm/dax-dev.c49
-rw-r--r--tools/testing/nvdimm/device_dax_test.c8
-rw-r--r--tools/testing/nvdimm/libnvdimm_test.c8
-rw-r--r--tools/testing/nvdimm/pmem-dax.c57
-rw-r--r--tools/testing/nvdimm/pmem_test.c8
-rw-r--r--tools/testing/nvdimm/test/Kbuild9
-rw-r--r--tools/testing/nvdimm/test/iomap.c403
-rw-r--r--tools/testing/nvdimm/test/nfit.c2957
-rw-r--r--tools/testing/nvdimm/test/nfit_test.h249
-rw-r--r--tools/testing/nvdimm/watermark.h21
-rw-r--r--tools/testing/radix-tree/.gitignore6
-rw-r--r--tools/testing/radix-tree/Makefile51
-rw-r--r--tools/testing/radix-tree/benchmark.c257
-rw-r--r--tools/testing/radix-tree/generated/autoconf.h1
-rw-r--r--tools/testing/radix-tree/idr-test.c512
-rw-r--r--tools/testing/radix-tree/iteration_check.c221
-rw-r--r--tools/testing/radix-tree/linux.c112
-rw-r--r--tools/testing/radix-tree/linux/bug.h1
-rw-r--r--tools/testing/radix-tree/linux/compiler_types.h0
-rw-r--r--tools/testing/radix-tree/linux/cpu.h1
-rw-r--r--tools/testing/radix-tree/linux/gfp.h33
-rw-r--r--tools/testing/radix-tree/linux/idr.h1
-rw-r--r--tools/testing/radix-tree/linux/init.h1
-rw-r--r--tools/testing/radix-tree/linux/kernel.h20
-rw-r--r--tools/testing/radix-tree/linux/kmemleak.h1
-rw-r--r--tools/testing/radix-tree/linux/percpu.h11
-rw-r--r--tools/testing/radix-tree/linux/preempt.h15
-rw-r--r--tools/testing/radix-tree/linux/radix-tree.h27
-rw-r--r--tools/testing/radix-tree/linux/rcupdate.h10
-rw-r--r--tools/testing/radix-tree/linux/slab.h27
-rw-r--r--tools/testing/radix-tree/linux/xarray.h2
-rw-r--r--tools/testing/radix-tree/main.c388
-rw-r--r--tools/testing/radix-tree/multiorder.c719
-rw-r--r--tools/testing/radix-tree/regression.h9
-rw-r--r--tools/testing/radix-tree/regression1.c221
-rw-r--r--tools/testing/radix-tree/regression2.c123
-rw-r--r--tools/testing/radix-tree/regression3.c118
-rw-r--r--tools/testing/radix-tree/tag_check.c380
-rw-r--r--tools/testing/radix-tree/test.c334
-rw-r--r--tools/testing/radix-tree/test.h65
-rw-r--r--tools/testing/scatterlist/Makefile30
-rw-r--r--tools/testing/scatterlist/linux/mm.h125
-rw-r--r--tools/testing/scatterlist/main.c79
-rw-r--r--tools/testing/selftests/.gitignore5
-rw-r--r--tools/testing/selftests/Makefile167
-rw-r--r--tools/testing/selftests/android/Makefile38
-rw-r--r--tools/testing/selftests/android/config5
-rw-r--r--tools/testing/selftests/android/ion/.gitignore3
-rw-r--r--tools/testing/selftests/android/ion/Makefile19
-rw-r--r--tools/testing/selftests/android/ion/README101
-rw-r--r--tools/testing/selftests/android/ion/ion.h143
-rwxr-xr-xtools/testing/selftests/android/ion/ion_test.sh58
-rw-r--r--tools/testing/selftests/android/ion/ionapp_export.c136
-rw-r--r--tools/testing/selftests/android/ion/ionapp_import.c88
-rw-r--r--tools/testing/selftests/android/ion/ionmap_test.c136
-rw-r--r--tools/testing/selftests/android/ion/ionutils.c253
-rw-r--r--tools/testing/selftests/android/ion/ionutils.h55
-rw-r--r--tools/testing/selftests/android/ion/ipcsocket.c227
-rw-r--r--tools/testing/selftests/android/ion/ipcsocket.h35
-rwxr-xr-xtools/testing/selftests/android/run.sh3
-rw-r--r--tools/testing/selftests/bpf/.gitignore21
-rw-r--r--tools/testing/selftests/bpf/Makefile145
-rw-r--r--tools/testing/selftests/bpf/bpf_endian.h57
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h336
-rw-r--r--tools/testing/selftests/bpf/bpf_rand.h80
-rw-r--r--tools/testing/selftests/bpf/bpf_rlimit.h28
-rw-r--r--tools/testing/selftests/bpf/bpf_util.h61
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c235
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h18
-rw-r--r--tools/testing/selftests/bpf/config20
-rw-r--r--tools/testing/selftests/bpf/connect4_prog.c45
-rw-r--r--tools/testing/selftests/bpf/connect6_prog.c61
-rw-r--r--tools/testing/selftests/bpf/dev_cgroup.c60
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_kern.c40
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_user.c149
-rw-r--r--tools/testing/selftests/bpf/gnu/stubs.h1
-rw-r--r--tools/testing/selftests/bpf/include/uapi/linux/types.h23
-rw-r--r--tools/testing/selftests/bpf/sample_map_ret0.c34
-rw-r--r--tools/testing/selftests/bpf/sample_ret0.c7
-rw-r--r--tools/testing/selftests/bpf/sendmsg4_prog.c49
-rw-r--r--tools/testing/selftests/bpf/sendmsg6_prog.c59
-rw-r--r--tools/testing/selftests/bpf/socket_cookie_prog.c60
-rw-r--r--tools/testing/selftests/bpf/sockmap_parse_prog.c46
-rw-r--r--tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c33
-rw-r--r--tools/testing/selftests/bpf/sockmap_verdict_prog.c73
-rwxr-xr-xtools/testing/selftests/bpf/tcp_client.py51
-rwxr-xr-xtools/testing/selftests/bpf/tcp_server.py83
-rw-r--r--tools/testing/selftests/bpf/test_adjust_tail.c30
-rw-r--r--tools/testing/selftests/bpf/test_align.c719
-rw-r--r--tools/testing/selftests/bpf/test_btf.c2840
-rw-r--r--tools/testing/selftests/bpf/test_btf_haskv.c45
-rw-r--r--tools/testing/selftests/bpf/test_btf_nokv.c43
-rw-r--r--tools/testing/selftests/bpf/test_cgroup_storage.c131
-rw-r--r--tools/testing/selftests/bpf/test_dev_cgroup.c96
-rw-r--r--tools/testing/selftests/bpf/test_get_stack_rawtp.c102
-rw-r--r--tools/testing/selftests/bpf/test_iptunnel_common.h37
-rwxr-xr-xtools/testing/selftests/bpf/test_kmod.sh61
-rw-r--r--tools/testing/selftests/bpf/test_l4lb.c473
-rw-r--r--tools/testing/selftests/bpf/test_l4lb_noinline.c473
-rwxr-xr-xtools/testing/selftests/bpf/test_libbpf.sh43
-rw-r--r--tools/testing/selftests/bpf/test_libbpf_open.c152
-rwxr-xr-xtools/testing/selftests/bpf/test_lirc_mode2.sh40
-rw-r--r--tools/testing/selftests/bpf/test_lirc_mode2_kern.c23
-rw-r--r--tools/testing/selftests/bpf/test_lirc_mode2_user.c149
-rw-r--r--tools/testing/selftests/bpf/test_lpm_map.c804
-rw-r--r--tools/testing/selftests/bpf/test_lru_map.c646
-rw-r--r--tools/testing/selftests/bpf/test_lwt_seg6local.c437
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_seg6local.sh149
-rw-r--r--tools/testing/selftests/bpf/test_maps.c1451
-rw-r--r--tools/testing/selftests/bpf/test_obj_id.c35
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py1288
-rw-r--r--tools/testing/selftests/bpf/test_pkt_access.c65
-rw-r--r--tools/testing/selftests/bpf/test_pkt_md_access.c46
-rw-r--r--tools/testing/selftests/bpf/test_progs.c1756
-rw-r--r--tools/testing/selftests/bpf/test_select_reuseport.c700
-rw-r--r--tools/testing/selftests/bpf/test_select_reuseport_common.h36
-rw-r--r--tools/testing/selftests/bpf/test_select_reuseport_kern.c180
-rwxr-xr-xtools/testing/selftests/bpf/test_skb_cgroup_id.sh62
-rw-r--r--tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c47
-rw-r--r--tools/testing/selftests/bpf/test_skb_cgroup_id_user.c187
-rw-r--r--tools/testing/selftests/bpf/test_sock.c480
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c1441
-rwxr-xr-xtools/testing/selftests/bpf/test_sock_addr.sh57
-rw-r--r--tools/testing/selftests/bpf/test_socket_cookie.c225
-rw-r--r--tools/testing/selftests/bpf/test_sockhash_kern.c5
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c1527
-rw-r--r--tools/testing/selftests/bpf/test_sockmap_kern.c5
-rw-r--r--tools/testing/selftests/bpf/test_sockmap_kern.h363
-rw-r--r--tools/testing/selftests/bpf/test_stacktrace_build_id.c76
-rw-r--r--tools/testing/selftests/bpf/test_stacktrace_map.c75
-rw-r--r--tools/testing/selftests/bpf/test_tag.c202
-rw-r--r--tools/testing/selftests/bpf/test_tcp_estats.c258
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf.h17
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_kern.c121
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_user.c131
-rw-r--r--tools/testing/selftests/bpf/test_tracepoint.c26
-rwxr-xr-xtools/testing/selftests/bpf/test_tunnel.sh731
-rw-r--r--tools/testing/selftests/bpf/test_tunnel_kern.c713
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c13243
-rw-r--r--tools/testing/selftests/bpf/test_verifier_log.c174
-rw-r--r--tools/testing/selftests/bpf/test_xdp.c235
-rw-r--r--tools/testing/selftests/bpf/test_xdp_meta.c53
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_meta.sh52
-rw-r--r--tools/testing/selftests/bpf/test_xdp_noinline.c833
-rw-r--r--tools/testing/selftests/bpf/test_xdp_redirect.c28
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect.sh61
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c214
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h25
-rw-r--r--tools/testing/selftests/bpf/urandom_read.c28
-rw-r--r--tools/testing/selftests/breakpoints/.gitignore2
-rw-r--r--tools/testing/selftests/breakpoints/Makefile16
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test.c401
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test_arm64.c258
-rw-r--r--tools/testing/selftests/breakpoints/step_after_suspend_test.c220
-rw-r--r--tools/testing/selftests/capabilities/.gitignore2
-rw-r--r--tools/testing/selftests/capabilities/Makefile9
-rw-r--r--tools/testing/selftests/capabilities/test_execve.c460
-rw-r--r--tools/testing/selftests/capabilities/validate_cap.c80
-rw-r--r--tools/testing/selftests/cgroup/.gitignore2
-rw-r--r--tools/testing/selftests/cgroup/Makefile12
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c370
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.h43
-rw-r--r--tools/testing/selftests/cgroup/test_core.c567
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c1228
-rw-r--r--tools/testing/selftests/cpu-hotplug/Makefile11
-rw-r--r--tools/testing/selftests/cpu-hotplug/config1
-rwxr-xr-xtools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh293
-rw-r--r--tools/testing/selftests/cpufreq/Makefile9
-rw-r--r--tools/testing/selftests/cpufreq/config15
-rwxr-xr-xtools/testing/selftests/cpufreq/cpu.sh85
-rwxr-xr-xtools/testing/selftests/cpufreq/cpufreq.sh242
-rwxr-xr-xtools/testing/selftests/cpufreq/governor.sh154
-rwxr-xr-xtools/testing/selftests/cpufreq/main.sh198
-rwxr-xr-xtools/testing/selftests/cpufreq/module.sh244
-rwxr-xr-xtools/testing/selftests/cpufreq/special-tests.sh116
-rwxr-xr-xtools/testing/selftests/drivers/gpu/drm_mm.sh16
-rwxr-xr-xtools/testing/selftests/drivers/gpu/i915.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh217
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh197
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh189
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh233
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/router_scale.sh167
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh366
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh119
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh117
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh13
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh55
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh18
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh19
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh134
-rwxr-xr-xtools/testing/selftests/drivers/usb/usbip/usbip_test.sh200
-rw-r--r--tools/testing/selftests/efivarfs/.gitignore2
-rw-r--r--tools/testing/selftests/efivarfs/Makefile7
-rw-r--r--tools/testing/selftests/efivarfs/config1
-rw-r--r--tools/testing/selftests/efivarfs/create-read.c39
-rwxr-xr-xtools/testing/selftests/efivarfs/efivarfs.sh215
-rw-r--r--tools/testing/selftests/efivarfs/open-unlink.c134
-rw-r--r--tools/testing/selftests/exec/.gitignore9
-rw-r--r--tools/testing/selftests/exec/Makefile24
-rw-r--r--tools/testing/selftests/exec/execveat.c425
-rw-r--r--tools/testing/selftests/filesystems/.gitignore2
-rw-r--r--tools/testing/selftests/filesystems/Makefile7
-rw-r--r--tools/testing/selftests/filesystems/devpts_pts.c316
-rw-r--r--tools/testing/selftests/filesystems/dnotify_test.c35
-rw-r--r--tools/testing/selftests/firmware/Makefile12
-rw-r--r--tools/testing/selftests/firmware/config5
-rwxr-xr-xtools/testing/selftests/firmware/fw_fallback.sh283
-rwxr-xr-xtools/testing/selftests/firmware/fw_filesystem.sh332
-rwxr-xr-xtools/testing/selftests/firmware/fw_lib.sh205
-rwxr-xr-xtools/testing/selftests/firmware/fw_run_tests.sh70
-rw-r--r--tools/testing/selftests/ftrace/.gitignore1
-rw-r--r--tools/testing/selftests/ftrace/Makefile8
-rw-r--r--tools/testing/selftests/ftrace/README82
-rw-r--r--tools/testing/selftests/ftrace/config9
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest330
-rw-r--r--tools/testing/selftests/ftrace/samples/fail.tc4
-rw-r--r--tools/testing/selftests/ftrace/samples/pass.tc3
-rw-r--r--tools/testing/selftests/ftrace/samples/unresolved.tc4
-rw-r--r--tools/testing/selftests/ftrace/samples/unsupported.tc3
-rw-r--r--tools/testing/selftests/ftrace/samples/untested.tc3
-rw-r--r--tools/testing/selftests/ftrace/samples/xfail.tc3
-rw-r--r--tools/testing/selftests/ftrace/settings1
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/basic1.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/basic2.tc9
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/basic3.tc10
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/basic4.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc28
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/event-enable.tc62
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/event-pid.tc74
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc62
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc65
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc92
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc53
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc62
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc118
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc123
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc81
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc170
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc186
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions100
-rw-r--r--tools/testing/selftests/ftrace/test.d/instances/instance-event.tc146
-rw-r--r--tools/testing/selftests/ftrace/test.d/instances/instance.tc86
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc13
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc15
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc18
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc48
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc107
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc38
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc37
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc56
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc29
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc17
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc41
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc39
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc43
-rw-r--r--tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc73
-rw-r--r--tools/testing/selftests/ftrace/test.d/template10
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc39
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc54
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc58
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc44
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc50
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc50
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc48
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc54
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc80
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc66
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc61
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc76
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc84
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc74
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc62
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc54
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc49
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc74
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc68
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc66
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc59
-rw-r--r--tools/testing/selftests/futex/Makefile37
-rw-r--r--tools/testing/selftests/futex/README62
-rw-r--r--tools/testing/selftests/futex/functional/.gitignore7
-rw-r--r--tools/testing/selftests/futex/functional/Makefile25
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi.c412
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c138
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c225
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c128
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_timeout.c89
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c126
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_wouldblock.c81
-rwxr-xr-xtools/testing/selftests/futex/functional/run.sh79
-rw-r--r--tools/testing/selftests/futex/include/atomic.h83
-rw-r--r--tools/testing/selftests/futex/include/futextest.h266
-rw-r--r--tools/testing/selftests/futex/include/logging.h152
-rwxr-xr-xtools/testing/selftests/futex/run.sh33
-rwxr-xr-xtools/testing/selftests/gen_kselftest_tar.sh53
-rw-r--r--tools/testing/selftests/gpio/.gitignore1
-rw-r--r--tools/testing/selftests/gpio/Makefile29
-rw-r--r--tools/testing/selftests/gpio/config2
-rw-r--r--tools/testing/selftests/gpio/gpio-mockup-chardev.c327
-rwxr-xr-xtools/testing/selftests/gpio/gpio-mockup-sysfs.sh135
-rwxr-xr-xtools/testing/selftests/gpio/gpio-mockup.sh206
-rw-r--r--tools/testing/selftests/ia64/.gitignore1
-rw-r--r--tools/testing/selftests/ia64/Makefile9
-rw-r--r--tools/testing/selftests/ia64/aliasing-test.c263
-rw-r--r--tools/testing/selftests/intel_pstate/.gitignore2
-rw-r--r--tools/testing/selftests/intel_pstate/Makefile16
-rw-r--r--tools/testing/selftests/intel_pstate/aperf.c83
-rw-r--r--tools/testing/selftests/intel_pstate/msr.c40
-rwxr-xr-xtools/testing/selftests/intel_pstate/run.sh128
-rw-r--r--tools/testing/selftests/ipc/.gitignore2
-rw-r--r--tools/testing/selftests/ipc/Makefile18
-rw-r--r--tools/testing/selftests/ipc/config2
-rw-r--r--tools/testing/selftests/ipc/msgque.c255
-rw-r--r--tools/testing/selftests/kcmp/.gitignore2
-rw-r--r--tools/testing/selftests/kcmp/Makefile8
-rw-r--r--tools/testing/selftests/kcmp/kcmp_test.c166
-rw-r--r--tools/testing/selftests/kmod/Makefile11
-rw-r--r--tools/testing/selftests/kmod/config7
-rwxr-xr-xtools/testing/selftests/kmod/kmod.sh623
-rw-r--r--tools/testing/selftests/kselftest.h183
-rw-r--r--tools/testing/selftests/kselftest_harness.h779
-rwxr-xr-xtools/testing/selftests/kselftest_install.sh35
-rw-r--r--tools/testing/selftests/kvm/.gitignore6
-rw-r--r--tools/testing/selftests/kvm/Makefile43
-rw-r--r--tools/testing/selftests/kvm/config3
-rw-r--r--tools/testing/selftests/kvm/cr4_cpuid_sync_test.c113
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c308
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h194
-rw-r--r--tools/testing/selftests/kvm/include/sparsebit.h75
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h44
-rw-r--r--tools/testing/selftests/kvm/include/vmx.h552
-rw-r--r--tools/testing/selftests/kvm/include/x86.h1047
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c92
-rw-r--r--tools/testing/selftests/kvm/lib/elf.c197
-rw-r--r--tools/testing/selftests/kvm/lib/io.c158
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c1680
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h72
-rw-r--r--tools/testing/selftests/kvm/lib/sparsebit.c2087
-rw-r--r--tools/testing/selftests/kvm/lib/vmx.c283
-rw-r--r--tools/testing/selftests/kvm/lib/x86.c893
-rw-r--r--tools/testing/selftests/kvm/platform_info_test.c110
-rw-r--r--tools/testing/selftests/kvm/set_sregs_test.c54
-rw-r--r--tools/testing/selftests/kvm/state_test.c196
-rw-r--r--tools/testing/selftests/kvm/sync_regs_test.c237
-rw-r--r--tools/testing/selftests/kvm/vmx_tsc_adjust_test.c175
-rw-r--r--tools/testing/selftests/lib.mk169
-rw-r--r--tools/testing/selftests/lib/Makefile8
-rwxr-xr-xtools/testing/selftests/lib/bitmap.sh19
-rw-r--r--tools/testing/selftests/lib/config3
-rwxr-xr-xtools/testing/selftests/lib/prime_numbers.sh19
-rwxr-xr-xtools/testing/selftests/lib/printf.sh19
-rw-r--r--tools/testing/selftests/locking/Makefile10
-rwxr-xr-xtools/testing/selftests/locking/ww_mutex.sh19
-rw-r--r--tools/testing/selftests/media_tests/.gitignore3
-rw-r--r--tools/testing/selftests/media_tests/Makefile6
-rwxr-xr-xtools/testing/selftests/media_tests/bind_unbind_sample.sh13
-rw-r--r--tools/testing/selftests/media_tests/media_device_open.c82
-rw-r--r--tools/testing/selftests/media_tests/media_device_test.c103
-rwxr-xr-xtools/testing/selftests/media_tests/open_loop_test.sh11
-rw-r--r--tools/testing/selftests/media_tests/regression_test.txt43
-rw-r--r--tools/testing/selftests/media_tests/video_device_test.c101
-rw-r--r--tools/testing/selftests/membarrier/.gitignore1
-rw-r--r--tools/testing/selftests/membarrier/Makefile6
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test.c312
-rw-r--r--tools/testing/selftests/memfd/.gitignore4
-rw-r--r--tools/testing/selftests/memfd/Makefile20
-rw-r--r--tools/testing/selftests/memfd/common.c46
-rw-r--r--tools/testing/selftests/memfd/common.h9
-rw-r--r--tools/testing/selftests/memfd/config1
-rw-r--r--tools/testing/selftests/memfd/fuse_mnt.c111
-rw-r--r--tools/testing/selftests/memfd/fuse_test.c330
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c970
-rwxr-xr-xtools/testing/selftests/memfd/run_fuse_test.sh15
-rwxr-xr-xtools/testing/selftests/memfd/run_hugetlbfs_test.sh68
-rw-r--r--tools/testing/selftests/memory-hotplug/Makefile11
-rw-r--r--tools/testing/selftests/memory-hotplug/config5
-rwxr-xr-xtools/testing/selftests/memory-hotplug/mem-on-off-test.sh291
-rw-r--r--tools/testing/selftests/mount/.gitignore1
-rw-r--r--tools/testing/selftests/mount/Makefile9
-rw-r--r--tools/testing/selftests/mount/config1
-rwxr-xr-xtools/testing/selftests/mount/run_tests.sh12
-rw-r--r--tools/testing/selftests/mount/unprivileged-remount-test.c371
-rw-r--r--tools/testing/selftests/mqueue/.gitignore2
-rw-r--r--tools/testing/selftests/mqueue/Makefile7
-rw-r--r--tools/testing/selftests/mqueue/mq_open_tests.c502
-rw-r--r--tools/testing/selftests/mqueue/mq_perf_tests.c752
-rw-r--r--tools/testing/selftests/net/.gitignore16
-rw-r--r--tools/testing/selftests/net/Makefile23
-rw-r--r--tools/testing/selftests/net/config16
-rwxr-xr-xtools/testing/selftests/net/fib-onlink-tests.sh467
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh255
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh1439
-rw-r--r--tools/testing/selftests/net/forwarding/.gitignore1
-rw-r--r--tools/testing/selftests/net/forwarding/README58
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_port_isolation.sh151
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh151
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh104
-rw-r--r--tools/testing/selftests/net/forwarding/config12
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh108
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample35
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_multipath.sh257
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh964
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre.sh160
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bound.sh226
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh132
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh132
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh129
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh283
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_changes.sh271
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_flower.sh137
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh285
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_lib.sh130
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_neigh.sh115
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_nh.sh131
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh94
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan.sh92
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh283
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_lib.sh132
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_topo_lib.sh101
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_vlan.sh131
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh135
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge.sh113
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_vlan.sh132
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_broadcast.sh233
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multipath.sh342
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh213
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_chains.sh205
-rw-r--r--tools/testing/selftests/net/forwarding/tc_common.sh25
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh260
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_shblocks.sh125
-rwxr-xr-xtools/testing/selftests/net/in_netns.sh23
-rwxr-xr-xtools/testing/selftests/net/ip6_gre_headroom.sh65
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c810
-rwxr-xr-xtools/testing/selftests/net/msg_zerocopy.sh120
-rwxr-xr-xtools/testing/selftests/net/netdevice.sh205
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh477
-rw-r--r--tools/testing/selftests/net/psock_fanout.c486
-rw-r--r--tools/testing/selftests/net/psock_lib.h158
-rw-r--r--tools/testing/selftests/net/psock_snd.c397
-rwxr-xr-xtools/testing/selftests/net/psock_snd.sh98
-rw-r--r--tools/testing/selftests/net/psock_tpacket.c864
-rw-r--r--tools/testing/selftests/net/reuseaddr_conflict.c114
-rw-r--r--tools/testing/selftests/net/reuseport_bpf.c641
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_cpu.c259
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_numa.c258
-rw-r--r--tools/testing/selftests/net/reuseport_dualstack.c210
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh1023
-rwxr-xr-xtools/testing/selftests/net/run_afpackettests27
-rwxr-xr-xtools/testing/selftests/net/run_netsocktests13
-rw-r--r--tools/testing/selftests/net/socket.c93
-rw-r--r--tools/testing/selftests/net/tcp_inq.c189
-rw-r--r--tools/testing/selftests/net/tcp_mmap.c447
-rwxr-xr-xtools/testing/selftests/net/test_bpf.sh11
-rw-r--r--tools/testing/selftests/net/tls.c741
-rw-r--r--tools/testing/selftests/net/udpgso.c685
-rwxr-xr-xtools/testing/selftests/net/udpgso.sh29
-rwxr-xr-xtools/testing/selftests/net/udpgso_bench.sh71
-rw-r--r--tools/testing/selftests/net/udpgso_bench_rx.c265
-rw-r--r--tools/testing/selftests/net/udpgso_bench_tx.c426
-rw-r--r--tools/testing/selftests/netfilter/Makefile6
-rw-r--r--tools/testing/selftests/netfilter/config2
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_icmp_related.sh283
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat.sh766
-rwxr-xr-xtools/testing/selftests/netfilter/nft_trans_stress.sh78
-rw-r--r--tools/testing/selftests/networking/timestamping/.gitignore4
-rw-r--r--tools/testing/selftests/networking/timestamping/Makefile13
-rw-r--r--tools/testing/selftests/networking/timestamping/hwtstamp_config.c135
-rw-r--r--tools/testing/selftests/networking/timestamping/rxtimestamp.c389
-rw-r--r--tools/testing/selftests/networking/timestamping/timestamping.c534
-rw-r--r--tools/testing/selftests/networking/timestamping/txtimestamp.c564
-rw-r--r--tools/testing/selftests/nsfs/.gitignore2
-rw-r--r--tools/testing/selftests/nsfs/Makefile5
-rw-r--r--tools/testing/selftests/nsfs/config3
-rw-r--r--tools/testing/selftests/nsfs/owner.c92
-rw-r--r--tools/testing/selftests/nsfs/pidns.c79
-rwxr-xr-xtools/testing/selftests/ntb/ntb_test.sh590
-rw-r--r--tools/testing/selftests/powerpc/Makefile74
-rw-r--r--tools/testing/selftests/powerpc/alignment/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/alignment/Makefile6
-rw-r--r--tools/testing/selftests/powerpc/alignment/alignment_handler.c575
-rw-r--r--tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c72
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/.gitignore7
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/Makefile18
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/context_switch.c506
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/exec_target.c16
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/fork.c325
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/futex_bench.c43
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/gettimeofday.c31
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/mmap_bench.c90
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/null_syscall.c157
-rw-r--r--tools/testing/selftests/powerpc/cache_shape/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/cache_shape/Makefile7
-rw-r--r--tools/testing/selftests/powerpc/cache_shape/cache_shape.c125
-rw-r--r--tools/testing/selftests/powerpc/copyloops/.gitignore13
-rw-r--r--tools/testing/selftests/powerpc/copyloops/Makefile53
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h0
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/export.h2
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h0
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h47
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/processor.h0
-rw-r--r--tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S24
l---------tools/testing/selftests/powerpc/copyloops/copyuser_64.S1
l---------tools/testing/selftests/powerpc/copyloops/copyuser_power7.S1
-rw-r--r--tools/testing/selftests/powerpc/copyloops/exc_validate.c124
l---------tools/testing/selftests/powerpc/copyloops/memcpy_64.S1
l---------tools/testing/selftests/powerpc/copyloops/memcpy_power7.S1
-rw-r--r--tools/testing/selftests/powerpc/copyloops/stubs.S19
-rw-r--r--tools/testing/selftests/powerpc/copyloops/validate.c100
-rw-r--r--tools/testing/selftests/powerpc/dscr/.gitignore7
-rw-r--r--tools/testing/selftests/powerpc/dscr/Makefile11
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr.h125
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_default_test.c127
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c71
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c109
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c87
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c101
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c80
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_user_test.c61
-rw-r--r--tools/testing/selftests/powerpc/harness.c131
-rw-r--r--tools/testing/selftests/powerpc/include/basic_asm.h74
-rw-r--r--tools/testing/selftests/powerpc/include/fpu_asm.h80
-rw-r--r--tools/testing/selftests/powerpc/include/gpr_asm.h96
-rw-r--r--tools/testing/selftests/powerpc/include/instructions.h69
-rw-r--r--tools/testing/selftests/powerpc/include/reg.h146
-rw-r--r--tools/testing/selftests/powerpc/include/subunit.h52
-rw-r--r--tools/testing/selftests/powerpc/include/utils.h83
-rw-r--r--tools/testing/selftests/powerpc/include/vmx_asm.h96
-rw-r--r--tools/testing/selftests/powerpc/include/vsx_asm.h71
-rw-r--r--tools/testing/selftests/powerpc/lib/reg.S397
-rw-r--r--tools/testing/selftests/powerpc/math/.gitignore7
-rw-r--r--tools/testing/selftests/powerpc/math/Makefile19
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_asm.S135
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_preempt.c113
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_signal.c135
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_syscall.c90
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_asm.S152
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_preempt.c112
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_signal.c156
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_syscall.c91
-rw-r--r--tools/testing/selftests/powerpc/math/vsx_asm.S61
-rw-r--r--tools/testing/selftests/powerpc/math/vsx_preempt.c147
-rw-r--r--tools/testing/selftests/powerpc/mm/.gitignore5
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile19
-rw-r--r--tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c77
-rw-r--r--tools/testing/selftests/powerpc/mm/prot_sao.c42
-rw-r--r--tools/testing/selftests/powerpc/mm/segv_errors.c78
-rw-r--r--tools/testing/selftests/powerpc/mm/subpage_prot.c236
-rw-r--r--tools/testing/selftests/powerpc/mm/tlbie_test.c734
-rw-r--r--tools/testing/selftests/powerpc/pmu/.gitignore3
-rw-r--r--tools/testing/selftests/powerpc/pmu/Makefile46
-rw-r--r--tools/testing/selftests/powerpc/pmu/count_instructions.c147
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/.gitignore22
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/Makefile28
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c106
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S271
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c61
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c95
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c91
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c58
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c117
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c91
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb.c485
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb.h78
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S365
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c88
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c92
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c88
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c133
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S43
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c81
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c167
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c102
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c86
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c109
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c61
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c106
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c93
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c40
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c93
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c85
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/trace.c300
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/trace.h41
-rw-r--r--tools/testing/selftests/powerpc/pmu/event.c131
-rw-r--r--tools/testing/selftests/powerpc/pmu/event.h43
-rw-r--r--tools/testing/selftests/powerpc/pmu/l3_bank_test.c48
-rw-r--r--tools/testing/selftests/powerpc/pmu/lib.c227
-rw-r--r--tools/testing/selftests/powerpc/pmu/lib.h40
-rw-r--r--tools/testing/selftests/powerpc/pmu/loop.S43
-rw-r--r--tools/testing/selftests/powerpc/pmu/per_event_excludes.c114
-rw-r--r--tools/testing/selftests/powerpc/primitives/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/primitives/Makefile8
l---------tools/testing/selftests/powerpc/primitives/asm/asm-compat.h1
l---------tools/testing/selftests/powerpc/primitives/asm/asm-const.h1
l---------tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h1
-rw-r--r--tools/testing/selftests/powerpc/primitives/asm/firmware.h0
-rw-r--r--tools/testing/selftests/powerpc/primitives/asm/ppc-opcode.h0
l---------tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h1
-rw-r--r--tools/testing/selftests/powerpc/primitives/asm/processor.h0
-rw-r--r--tools/testing/selftests/powerpc/primitives/linux/stringify.h0
-rw-r--r--tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c159
l---------tools/testing/selftests/powerpc/primitives/word-at-a-time.h1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/.gitignore12
-rw-r--r--tools/testing/selftests/powerpc/ptrace/Makefile15
-rw-r--r--tools/testing/selftests/powerpc/ptrace/child.h139
-rw-r--r--tools/testing/selftests/powerpc/ptrace/core-pkey.c462
-rw-r--r--tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c195
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c123
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h74
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c342
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c330
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tar.c135
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tar.h50
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c158
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c169
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c174
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c184
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c167
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c160
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c167
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c117
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-vsx.h127
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace.h749
-rwxr-xr-xtools/testing/selftests/powerpc/scripts/hmi.sh89
-rw-r--r--tools/testing/selftests/powerpc/signal/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/signal/Makefile10
-rw-r--r--tools/testing/selftests/powerpc/signal/signal.S50
-rw-r--r--tools/testing/selftests/powerpc/signal/signal.c111
-rw-r--r--tools/testing/selftests/powerpc/signal/signal_tm.c110
-rw-r--r--tools/testing/selftests/powerpc/stringloops/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/stringloops/Makefile35
-rw-r--r--tools/testing/selftests/powerpc/stringloops/asm/cache.h1
-rw-r--r--tools/testing/selftests/powerpc/stringloops/asm/export.h1
-rw-r--r--tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h39
-rw-r--r--tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h33
-rw-r--r--tools/testing/selftests/powerpc/stringloops/memcmp.c159
l---------tools/testing/selftests/powerpc/stringloops/memcmp_32.S1
l---------tools/testing/selftests/powerpc/stringloops/memcmp_64.S1
-rw-r--r--tools/testing/selftests/powerpc/stringloops/string.c21
-rw-r--r--tools/testing/selftests/powerpc/stringloops/strlen.c127
l---------tools/testing/selftests/powerpc/stringloops/strlen_32.S1
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/Makefile18
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/check.S101
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/common.h7
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S82
-rw-r--r--tools/testing/selftests/powerpc/syscalls/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/syscalls/Makefile8
-rw-r--r--tools/testing/selftests/powerpc/syscalls/ipc.h48
-rw-r--r--tools/testing/selftests/powerpc/syscalls/ipc_unmuxed.c61
-rw-r--r--tools/testing/selftests/powerpc/tm/.gitignore17
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile26
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-exec.c70
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-fork.c42
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-resched-dscr.c100
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c92
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c90
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c110
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c125
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c74
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-stack.c76
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal.S114
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-sigreturn.c93
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-syscall-asm.S28
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-syscall.c106
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-tar.c91
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-tmspr.c150
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-trap.c331
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-unavailable.c407
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c118
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-vmxcopy.c104
-rw-r--r--tools/testing/selftests/powerpc/tm/tm.h96
-rw-r--r--tools/testing/selftests/powerpc/utils.c136
-rw-r--r--tools/testing/selftests/powerpc/vphn/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/vphn/Makefile9
-rw-r--r--tools/testing/selftests/powerpc/vphn/test-vphn.c411
l---------tools/testing/selftests/powerpc/vphn/vphn.c1
l---------tools/testing/selftests/powerpc/vphn/vphn.h1
-rw-r--r--tools/testing/selftests/prctl/.gitignore3
-rw-r--r--tools/testing/selftests/prctl/Makefile16
-rw-r--r--tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c98
-rw-r--r--tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c97
-rw-r--r--tools/testing/selftests/prctl/disable-tsc-test.c96
-rw-r--r--tools/testing/selftests/proc/.gitignore14
-rw-r--r--tools/testing/selftests/proc/Makefile20
-rw-r--r--tools/testing/selftests/proc/config1
-rw-r--r--tools/testing/selftests/proc/fd-001-lookup.c168
-rw-r--r--tools/testing/selftests/proc/fd-002-posix-eq.c57
-rw-r--r--tools/testing/selftests/proc/fd-003-kthread.c178
-rw-r--r--tools/testing/selftests/proc/proc-loadavg-001.c82
-rw-r--r--tools/testing/selftests/proc/proc-self-map-files-001.c82
-rw-r--r--tools/testing/selftests/proc/proc-self-map-files-002.c90
-rw-r--r--tools/testing/selftests/proc/proc-self-syscall.c59
-rw-r--r--tools/testing/selftests/proc/proc-self-wchan.c40
-rw-r--r--tools/testing/selftests/proc/proc-uptime-001.c45
-rw-r--r--tools/testing/selftests/proc/proc-uptime-002.c78
-rw-r--r--tools/testing/selftests/proc/proc-uptime.h60
-rw-r--r--tools/testing/selftests/proc/proc.h51
-rw-r--r--tools/testing/selftests/proc/read.c132
-rw-r--r--tools/testing/selftests/proc/self.c39
-rw-r--r--tools/testing/selftests/proc/setns-dcache.c129
-rw-r--r--tools/testing/selftests/proc/thread-self.c64
-rw-r--r--tools/testing/selftests/pstore/.gitignore2
-rw-r--r--tools/testing/selftests/pstore/Makefile14
-rwxr-xr-xtools/testing/selftests/pstore/common_tests83
-rw-r--r--tools/testing/selftests/pstore/config5
-rwxr-xr-xtools/testing/selftests/pstore/pstore_crash_test30
-rwxr-xr-xtools/testing/selftests/pstore/pstore_post_reboot_tests80
-rwxr-xr-xtools/testing/selftests/pstore/pstore_tests30
-rw-r--r--tools/testing/selftests/ptp/.gitignore1
-rw-r--r--tools/testing/selftests/ptp/Makefile10
-rw-r--r--tools/testing/selftests/ptp/testptp.c521
-rw-r--r--tools/testing/selftests/ptp/testptp.mk33
-rw-r--r--tools/testing/selftests/ptrace/.gitignore1
-rw-r--r--tools/testing/selftests/ptrace/Makefile5
-rw-r--r--tools/testing/selftests/ptrace/peeksiginfo.c219
-rw-r--r--tools/testing/selftests/rcutorture/.gitignore4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configNR_CPUS.sh45
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/config_override.sh61
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configcheck.sh54
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configinit.sh72
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/cpus2use.sh41
-rw-r--r--tools/testing/selftests/rcutorture/bin/functions.sh279
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh90
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh60
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh56
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh51
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh74
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh122
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh96
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh73
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh271
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh471
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-build.sh62
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh176
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/BUSTED6
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/BUSTED.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/CFLIST7
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK016
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK026
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK036
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK03.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK046
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK04.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK056
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK066
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK076
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh42
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED7
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFLIST18
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-N8
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-P12
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-t10
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-u10
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS0110
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS024
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS0312
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY0113
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY0214
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0118
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot5
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0223
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0318
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot5
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0420
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0520
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot5
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0623
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot7
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0717
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0823
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot5
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0918
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh56
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/TINY16
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/TREE19
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/TREE5422
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh29
-rw-r--r--tools/testing/selftests/rcutorture/doc/TINY_RCU.txt38
-rw-r--r--tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt81
-rw-r--r--tools/testing/selftests/rcutorture/doc/initrd.txt113
-rw-r--r--tools/testing/selftests/rcutorture/doc/rcu-test-image.txt42
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile17
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h156
-rwxr-xr-xtools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk376
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h17
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h41
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h14
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c14
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h28
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c32
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h34
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h221
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c12
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h58
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h93
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c79
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h59
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c51
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h103
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile12
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c73
-rwxr-xr-xtools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh103
-rw-r--r--tools/testing/selftests/rseq/.gitignore6
-rw-r--r--tools/testing/selftests/rseq/Makefile30
-rw-r--r--tools/testing/selftests/rseq/basic_percpu_ops_test.c312
-rw-r--r--tools/testing/selftests/rseq/basic_test.c56
-rw-r--r--tools/testing/selftests/rseq/param_test.c1331
-rw-r--r--tools/testing/selftests/rseq/rseq-arm.h716
-rw-r--r--tools/testing/selftests/rseq/rseq-arm64.h594
-rw-r--r--tools/testing/selftests/rseq/rseq-mips.h725
-rw-r--r--tools/testing/selftests/rseq/rseq-ppc.h671
-rw-r--r--tools/testing/selftests/rseq/rseq-s390.h513
-rw-r--r--tools/testing/selftests/rseq/rseq-skip.h65
-rw-r--r--tools/testing/selftests/rseq/rseq-x86.h1132
-rw-r--r--tools/testing/selftests/rseq/rseq.c117
-rw-r--r--tools/testing/selftests/rseq/rseq.h163
-rwxr-xr-xtools/testing/selftests/rseq/run_param_test.sh121
-rw-r--r--tools/testing/selftests/rseq/settings1
-rw-r--r--tools/testing/selftests/rtc/.gitignore2
-rw-r--r--tools/testing/selftests/rtc/Makefile9
-rw-r--r--tools/testing/selftests/rtc/rtctest.c337
-rw-r--r--tools/testing/selftests/rtc/setdate.c86
-rw-r--r--tools/testing/selftests/seccomp/.gitignore2
-rw-r--r--tools/testing/selftests/seccomp/Makefile17
-rw-r--r--tools/testing/selftests/seccomp/config2
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c99
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c2997
-rw-r--r--tools/testing/selftests/sigaltstack/.gitignore1
-rw-r--r--tools/testing/selftests/sigaltstack/Makefile5
-rw-r--r--tools/testing/selftests/sigaltstack/sas.c191
-rw-r--r--tools/testing/selftests/size/.gitignore1
-rw-r--r--tools/testing/selftests/size/Makefile5
-rw-r--r--tools/testing/selftests/size/get_size.c117
-rw-r--r--tools/testing/selftests/sparc64/Makefile50
-rw-r--r--tools/testing/selftests/sparc64/drivers/.gitignore1
-rw-r--r--tools/testing/selftests/sparc64/drivers/Makefile15
-rw-r--r--tools/testing/selftests/sparc64/drivers/adi-test.c721
-rwxr-xr-xtools/testing/selftests/sparc64/drivers/drivers_test.sh30
-rwxr-xr-xtools/testing/selftests/sparc64/run.sh3
-rw-r--r--tools/testing/selftests/splice/.gitignore1
-rw-r--r--tools/testing/selftests/splice/Makefile5
-rw-r--r--tools/testing/selftests/splice/default_file_splice_read.c9
-rwxr-xr-xtools/testing/selftests/splice/default_file_splice_read.sh8
-rw-r--r--tools/testing/selftests/static_keys/Makefile8
-rw-r--r--tools/testing/selftests/static_keys/config1
-rwxr-xr-xtools/testing/selftests/static_keys/test_static_keys.sh30
-rw-r--r--tools/testing/selftests/sync/.gitignore1
-rw-r--r--tools/testing/selftests/sync/Makefile38
-rw-r--r--tools/testing/selftests/sync/config4
-rw-r--r--tools/testing/selftests/sync/sw_sync.h46
-rw-r--r--tools/testing/selftests/sync/sync.c221
-rw-r--r--tools/testing/selftests/sync/sync.h40
-rw-r--r--tools/testing/selftests/sync/sync_alloc.c74
-rw-r--r--tools/testing/selftests/sync/sync_fence.c132
-rw-r--r--tools/testing/selftests/sync/sync_merge.c60
-rw-r--r--tools/testing/selftests/sync/sync_stress_consumer.c185
-rw-r--r--tools/testing/selftests/sync/sync_stress_merge.c115
-rw-r--r--tools/testing/selftests/sync/sync_stress_parallelism.c111
-rw-r--r--tools/testing/selftests/sync/sync_test.c113
-rw-r--r--tools/testing/selftests/sync/sync_wait.c91
-rw-r--r--tools/testing/selftests/sync/synctest.h67
-rw-r--r--tools/testing/selftests/sysctl/Makefile12
-rw-r--r--tools/testing/selftests/sysctl/config1
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh780
-rw-r--r--tools/testing/selftests/tc-testing/.gitignore2
-rw-r--r--tools/testing/selftests/tc-testing/README247
-rw-r--r--tools/testing/selftests/tc-testing/TODO.txt31
-rw-r--r--tools/testing/selftests/tc-testing/TdcPlugin.py74
-rw-r--r--tools/testing/selftests/tc-testing/bpf/Makefile29
-rw-r--r--tools/testing/selftests/tc-testing/bpf/action.c23
-rw-r--r--tools/testing/selftests/tc-testing/config48
-rw-r--r--tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt104
-rw-r--r--tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt100
-rw-r--r--tools/testing/selftests/tc-testing/creating-testcases/example.json55
-rw-r--r--tools/testing/selftests/tc-testing/creating-testcases/template.json51
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS27
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py141
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py19
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py142
-rw-r--r--tools/testing/selftests/tc-testing/plugins/__init__.py0
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json294
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json291
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/csum.json504
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/gact.json540
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ife.json1064
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json438
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/nat.json593
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/police.json719
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/sample.json612
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/simple.json130
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json488
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json396
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json888
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json692
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/fw.json1049
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/tests.json42
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.py663
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc_batch.py62
-rw-r--r--tools/testing/selftests/tc-testing/tdc_config.py36
-rw-r--r--tools/testing/selftests/tc-testing/tdc_config_local_template.py23
-rw-r--r--tools/testing/selftests/tc-testing/tdc_helper.py67
-rw-r--r--tools/testing/selftests/timers/.gitignore21
-rw-r--r--tools/testing/selftests/timers/Makefile23
-rw-r--r--tools/testing/selftests/timers/adjtick.c211
-rw-r--r--tools/testing/selftests/timers/alarmtimer-suspend.c178
-rw-r--r--tools/testing/selftests/timers/change_skew.c96
-rw-r--r--tools/testing/selftests/timers/clocksource-switch.c168
-rw-r--r--tools/testing/selftests/timers/freq-step.c271
-rw-r--r--tools/testing/selftests/timers/inconsistency-check.c193
-rw-r--r--tools/testing/selftests/timers/leap-a-day.c378
-rw-r--r--tools/testing/selftests/timers/leapcrash.c108
-rw-r--r--tools/testing/selftests/timers/mqueue-lat.c114
-rw-r--r--tools/testing/selftests/timers/nanosleep.c165
-rw-r--r--tools/testing/selftests/timers/nsleep-lat.c180
-rw-r--r--tools/testing/selftests/timers/posix_timers.c222
-rw-r--r--tools/testing/selftests/timers/raw_skew.c148
-rw-r--r--tools/testing/selftests/timers/rtcpie.c142
-rw-r--r--tools/testing/selftests/timers/set-2038.c133
-rw-r--r--tools/testing/selftests/timers/set-tai.c69
-rw-r--r--tools/testing/selftests/timers/set-timer-lat.c283
-rw-r--r--tools/testing/selftests/timers/set-tz.c110
-rw-r--r--tools/testing/selftests/timers/skew_consistency.c78
-rw-r--r--tools/testing/selftests/timers/threadtest.c193
-rw-r--r--tools/testing/selftests/timers/valid-adjtimex.c330
-rw-r--r--tools/testing/selftests/uevent/Makefile17
-rw-r--r--tools/testing/selftests/uevent/config2
-rw-r--r--tools/testing/selftests/uevent/uevent_filtering.c486
-rw-r--r--tools/testing/selftests/user/Makefile8
-rw-r--r--tools/testing/selftests/user/config1
-rwxr-xr-xtools/testing/selftests/user/test_user_copy.sh18
-rw-r--r--tools/testing/selftests/vDSO/.gitignore2
-rw-r--r--tools/testing/selftests/vDSO/Makefile26
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.c269
-rw-r--r--tools/testing/selftests/vDSO/vdso_standalone_test_x86.c128
-rw-r--r--tools/testing/selftests/vDSO/vdso_test.c68
-rw-r--r--tools/testing/selftests/vm/.gitignore15
-rw-r--r--tools/testing/selftests/vm/Makefile34
-rw-r--r--tools/testing/selftests/vm/compaction_test.c230
-rw-r--r--tools/testing/selftests/vm/config2
-rw-r--r--tools/testing/selftests/vm/gup_benchmark.c93
-rw-r--r--tools/testing/selftests/vm/hugepage-mmap.c93
-rw-r--r--tools/testing/selftests/vm/hugepage-shm.c101
-rw-r--r--tools/testing/selftests/vm/map_hugetlb.c84
-rw-r--r--tools/testing/selftests/vm/map_populate.c113
-rw-r--r--tools/testing/selftests/vm/mlock-random-test.c294
-rw-r--r--tools/testing/selftests/vm/mlock2-tests.c520
-rw-r--r--tools/testing/selftests/vm/mlock2.h63
-rw-r--r--tools/testing/selftests/vm/on-fault-limit.c48
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests214
-rw-r--r--tools/testing/selftests/vm/thuge-gen.c257
-rw-r--r--tools/testing/selftests/vm/transhuge-stress.c144
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c1333
-rw-r--r--tools/testing/selftests/vm/va_128TBswitch.c297
-rw-r--r--tools/testing/selftests/vm/virtual_address_range.c139
-rw-r--r--tools/testing/selftests/watchdog/.gitignore1
-rw-r--r--tools/testing/selftests/watchdog/Makefile4
-rw-r--r--tools/testing/selftests/watchdog/watchdog-test.c167
-rw-r--r--tools/testing/selftests/x86/.gitignore15
-rw-r--r--tools/testing/selftests/x86/Makefile105
-rwxr-xr-xtools/testing/selftests/x86/check_cc.sh16
-rw-r--r--tools/testing/selftests/x86/check_initial_reg_state.c109
-rw-r--r--tools/testing/selftests/x86/entry_from_vm86.c349
-rw-r--r--tools/testing/selftests/x86/fsgsbase.c427
-rw-r--r--tools/testing/selftests/x86/ioperm.c171
-rw-r--r--tools/testing/selftests/x86/iopl.c136
-rw-r--r--tools/testing/selftests/x86/ldt_gdt.c927
-rw-r--r--tools/testing/selftests/x86/mov_ss_trap.c285
-rw-r--r--tools/testing/selftests/x86/mpx-debug.h15
-rw-r--r--tools/testing/selftests/x86/mpx-dig.c499
-rw-r--r--tools/testing/selftests/x86/mpx-hw.h124
-rw-r--r--tools/testing/selftests/x86/mpx-mini-test.c1616
-rw-r--r--tools/testing/selftests/x86/mpx-mm.h10
-rw-r--r--tools/testing/selftests/x86/pkey-helpers.h219
-rw-r--r--tools/testing/selftests/x86/protection_keys.c1508
-rw-r--r--tools/testing/selftests/x86/ptrace_syscall.c430
-rw-r--r--tools/testing/selftests/x86/raw_syscall_helper_32.S47
-rw-r--r--tools/testing/selftests/x86/sigreturn.c871
-rw-r--r--tools/testing/selftests/x86/single_step_syscall.c187
-rw-r--r--tools/testing/selftests/x86/syscall_arg_fault.c130
-rw-r--r--tools/testing/selftests/x86/syscall_nt.c96
-rw-r--r--tools/testing/selftests/x86/sysret_rip.c195
-rw-r--r--tools/testing/selftests/x86/sysret_ss_attrs.c112
-rw-r--r--tools/testing/selftests/x86/test_FCMOV.c94
-rw-r--r--tools/testing/selftests/x86/test_FCOMI.c332
-rw-r--r--tools/testing/selftests/x86/test_FISTTP.c138
-rw-r--r--tools/testing/selftests/x86/test_mremap_vdso.c115
-rw-r--r--tools/testing/selftests/x86/test_syscall_vdso.c408
-rw-r--r--tools/testing/selftests/x86/test_vdso.c337
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c506
-rw-r--r--tools/testing/selftests/x86/thunks.S67
-rw-r--r--tools/testing/selftests/x86/thunks_32.S55
-rw-r--r--tools/testing/selftests/x86/trivial_32bit_program.c18
-rw-r--r--tools/testing/selftests/x86/trivial_64bit_program.c18
-rw-r--r--tools/testing/selftests/x86/trivial_program.c10
-rw-r--r--tools/testing/selftests/x86/unwind_vdso.c211
-rw-r--r--tools/testing/selftests/x86/vdso_restorer.c88
-rw-r--r--tools/testing/selftests/zram/Makefile9
-rw-r--r--tools/testing/selftests/zram/README40
-rw-r--r--tools/testing/selftests/zram/config2
-rwxr-xr-xtools/testing/selftests/zram/zram.sh18
-rwxr-xr-xtools/testing/selftests/zram/zram01.sh84
-rwxr-xr-xtools/testing/selftests/zram/zram02.sh53
-rwxr-xr-xtools/testing/selftests/zram/zram_lib.sh278
-rw-r--r--tools/testing/vsock/.gitignore2
-rw-r--r--tools/testing/vsock/Makefile9
-rw-r--r--tools/testing/vsock/README36
-rw-r--r--tools/testing/vsock/control.c219
-rw-r--r--tools/testing/vsock/control.h13
-rw-r--r--tools/testing/vsock/timeout.c64
-rw-r--r--tools/testing/vsock/timeout.h14
-rw-r--r--tools/testing/vsock/vsock_diag_test.c681
1068 files changed, 179540 insertions, 0 deletions
diff --git a/tools/testing/fault-injection/failcmd.sh b/tools/testing/fault-injection/failcmd.sh
new file mode 100644
index 000000000..29a6c63c5
--- /dev/null
+++ b/tools/testing/fault-injection/failcmd.sh
@@ -0,0 +1,220 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# NAME
+# failcmd.sh - run a command with injecting slab/page allocation failures
+#
+# SYNOPSIS
+# failcmd.sh --help
+# failcmd.sh [<options>] command [arguments]
+#
+# DESCRIPTION
+# Run command with injecting slab/page allocation failures by fault
+# injection.
+#
+# NOTE: you need to run this script as root.
+#
+
+usage()
+{
+ cat >&2 <<EOF
+Usage: $0 [options] command [arguments]
+
+OPTIONS
+ -p percent
+ --probability=percent
+ likelihood of failure injection, in percent.
+ Default value is 1
+
+ -t value
+ --times=value
+ specifies how many times failures may happen at most.
+ Default value is 1
+
+ --oom-kill-allocating-task=value
+ set /proc/sys/vm/oom_kill_allocating_task to specified value
+ before running the command.
+ Default value is 1
+
+ -h, --help
+ Display a usage message and exit
+
+ --interval=value, --space=value, --verbose=value, --task-filter=value,
+ --stacktrace-depth=value, --require-start=value, --require-end=value,
+ --reject-start=value, --reject-end=value, --ignore-gfp-wait=value
+ See Documentation/fault-injection/fault-injection.txt for more
+ information
+
+ failslab options:
+ --cache-filter=value
+
+ fail_page_alloc options:
+ --ignore-gfp-highmem=value, --min-order=value
+
+ENVIRONMENT
+ FAILCMD_TYPE
+ The following values for FAILCMD_TYPE are recognized:
+
+ failslab
+ inject slab allocation failures
+ fail_page_alloc
+ inject page allocation failures
+
+ If FAILCMD_TYPE is not defined, then failslab is used.
+EOF
+}
+
+if [ $UID != 0 ]; then
+ echo must be run as root >&2
+ exit 1
+fi
+
+DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3}'`
+
+if [ ! -d "$DEBUGFS" ]; then
+ echo debugfs is not mounted >&2
+ exit 1
+fi
+
+FAILCMD_TYPE=${FAILCMD_TYPE:-failslab}
+FAULTATTR=$DEBUGFS/$FAILCMD_TYPE
+
+if [ ! -d $FAULTATTR ]; then
+ echo $FAILCMD_TYPE is not available >&2
+ exit 1
+fi
+
+LONGOPTS=probability:,interval:,times:,space:,verbose:,task-filter:
+LONGOPTS=$LONGOPTS,stacktrace-depth:,require-start:,require-end:
+LONGOPTS=$LONGOPTS,reject-start:,reject-end:,oom-kill-allocating-task:,help
+
+if [ $FAILCMD_TYPE = failslab ]; then
+ LONGOPTS=$LONGOPTS,ignore-gfp-wait:,cache-filter:
+elif [ $FAILCMD_TYPE = fail_page_alloc ]; then
+ LONGOPTS=$LONGOPTS,ignore-gfp-wait:,ignore-gfp-highmem:,min-order:
+fi
+
+TEMP=`getopt -o p:i:t:s:v:h --long $LONGOPTS -n 'failcmd.sh' -- "$@"`
+
+if [ $? != 0 ]; then
+ usage
+ exit 1
+fi
+
+eval set -- "$TEMP"
+
+fault_attr_default()
+{
+ echo N > $FAULTATTR/task-filter
+ echo 0 > $FAULTATTR/probability
+ echo 1 > $FAULTATTR/times
+}
+
+fault_attr_default
+
+oom_kill_allocating_task_saved=`cat /proc/sys/vm/oom_kill_allocating_task`
+
+restore_values()
+{
+ fault_attr_default
+ echo $oom_kill_allocating_task_saved \
+ > /proc/sys/vm/oom_kill_allocating_task
+}
+
+#
+# Default options
+#
+declare -i oom_kill_allocating_task=1
+declare task_filter=Y
+declare -i probability=1
+declare -i times=1
+
+while true; do
+ case "$1" in
+ -p|--probability)
+ probability=$2
+ shift 2
+ ;;
+ -i|--interval)
+ echo $2 > $FAULTATTR/interval
+ shift 2
+ ;;
+ -t|--times)
+ times=$2
+ shift 2
+ ;;
+ -s|--space)
+ echo $2 > $FAULTATTR/space
+ shift 2
+ ;;
+ -v|--verbose)
+ echo $2 > $FAULTATTR/verbose
+ shift 2
+ ;;
+ --task-filter)
+ task_filter=$2
+ shift 2
+ ;;
+ --stacktrace-depth)
+ echo $2 > $FAULTATTR/stacktrace-depth
+ shift 2
+ ;;
+ --require-start)
+ echo $2 > $FAULTATTR/require-start
+ shift 2
+ ;;
+ --require-end)
+ echo $2 > $FAULTATTR/require-end
+ shift 2
+ ;;
+ --reject-start)
+ echo $2 > $FAULTATTR/reject-start
+ shift 2
+ ;;
+ --reject-end)
+ echo $2 > $FAULTATTR/reject-end
+ shift 2
+ ;;
+ --oom-kill-allocating-task)
+ oom_kill_allocating_task=$2
+ shift 2
+ ;;
+ --ignore-gfp-wait)
+ echo $2 > $FAULTATTR/ignore-gfp-wait
+ shift 2
+ ;;
+ --cache-filter)
+ echo $2 > $FAULTATTR/cache_filter
+ shift 2
+ ;;
+ --ignore-gfp-highmem)
+ echo $2 > $FAULTATTR/ignore-gfp-highmem
+ shift 2
+ ;;
+ --min-order)
+ echo $2 > $FAULTATTR/min-order
+ shift 2
+ ;;
+ -h|--help)
+ usage
+ exit 0
+ shift
+ ;;
+ --)
+ shift
+ break
+ ;;
+ esac
+done
+
+[ -z "$1" ] && exit 0
+
+echo $oom_kill_allocating_task > /proc/sys/vm/oom_kill_allocating_task
+echo $task_filter > $FAULTATTR/task-filter
+echo $probability > $FAULTATTR/probability
+echo $times > $FAULTATTR/times
+
+trap "restore_values" SIGINT SIGTERM EXIT
+
+cmd="echo 1 > /proc/self/make-it-fail && exec $@"
+bash -c "$cmd"
diff --git a/tools/testing/ktest/compare-ktest-sample.pl b/tools/testing/ktest/compare-ktest-sample.pl
new file mode 100755
index 000000000..ebea21d0a
--- /dev/null
+++ b/tools/testing/ktest/compare-ktest-sample.pl
@@ -0,0 +1,33 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+open (IN,"ktest.pl");
+while (<IN>) {
+ # hashes are now used
+ if (/\$opt\{"?([A-Z].*?)(\[.*\])?"?\}/ ||
+ /^\s*"?([A-Z].*?)"?\s*=>\s*/ ||
+ /set_test_option\("(.*?)"/) {
+ $opt{$1} = 1;
+ }
+}
+close IN;
+
+open (IN, "sample.conf");
+while (<IN>) {
+ if (/^\s*#?\s*([A-Z]\S*)\s*=/) {
+ $samp{$1} = 1;
+ }
+}
+close IN;
+
+foreach $opt (keys %opt) {
+ if (!defined($samp{$opt})) {
+ print "opt = $opt\n";
+ }
+}
+
+foreach $samp (keys %samp) {
+ if (!defined($opt{$samp})) {
+ print "samp = $samp\n";
+ }
+}
diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl
new file mode 100755
index 000000000..b28feea7c
--- /dev/null
+++ b/tools/testing/ktest/config-bisect.pl
@@ -0,0 +1,770 @@
+#!/usr/bin/perl -w
+#
+# Copyright 2015 - Steven Rostedt, Red Hat Inc.
+# Copyright 2017 - Steven Rostedt, VMware, Inc.
+#
+# Licensed under the terms of the GNU GPL License version 2
+#
+
+# usage:
+# config-bisect.pl [options] good-config bad-config [good|bad]
+#
+
+# Compares a good config to a bad config, then takes half of the diffs
+# and produces a config that is somewhere between the good config and
+# the bad config. That is, the resulting config will start with the
+# good config and will try to make half of the differences of between
+# the good and bad configs match the bad config. It tries because of
+# dependencies between the two configs it may not be able to change
+# exactly half of the configs that are different between the two config
+# files.
+
+# Here's a normal way to use it:
+#
+# $ cd /path/to/linux/kernel
+# $ config-bisect.pl /path/to/good/config /path/to/bad/config
+
+# This will now pull in good config (blowing away .config in that directory
+# so do not make that be one of the good or bad configs), and then
+# build the config with "make oldconfig" to make sure it matches the
+# current kernel. It will then store the configs in that result for
+# the good config. It does the same for the bad config as well.
+# The algorithm will run, merging half of the differences between
+# the two configs and building them with "make oldconfig" to make sure
+# the result changes (dependencies may reset changes the tool had made).
+# It then copies the result of its good config to /path/to/good/config.tmp
+# and the bad config to /path/to/bad/config.tmp (just appends ".tmp" to the
+# files passed in). And the ".config" that you should test will be in
+# directory
+
+# After the first run, determine if the result is good or bad then
+# run the same command appending the result
+
+# For good results:
+# $ config-bisect.pl /path/to/good/config /path/to/bad/config good
+
+# For bad results:
+# $ config-bisect.pl /path/to/good/config /path/to/bad/config bad
+
+# Do not change the good-config or bad-config, config-bisect.pl will
+# copy the good-config to a temp file with the same name as good-config
+# but with a ".tmp" after it. It will do the same with the bad-config.
+
+# If "good" or "bad" is not stated at the end, it will copy the good and
+# bad configs to the .tmp versions. If a .tmp version already exists, it will
+# warn before writing over them (-r will not warn, and just write over them).
+# If the last config is labeled "good", then it will copy it to the good .tmp
+# version. If the last config is labeled "bad", it will copy it to the bad
+# .tmp version. It will continue this until it can not merge the two any more
+# without the result being equal to either the good or bad .tmp configs.
+
+my $start = 0;
+my $val = "";
+
+my $pwd = `pwd`;
+chomp $pwd;
+my $tree = $pwd;
+my $build;
+
+my $output_config;
+my $reset_bisect;
+
+sub usage {
+ print << "EOF"
+
+usage: config-bisect.pl [-l linux-tree][-b build-dir] good-config bad-config [good|bad]
+ -l [optional] define location of linux-tree (default is current directory)
+ -b [optional] define location to build (O=build-dir) (default is linux-tree)
+ good-config the config that is considered good
+ bad-config the config that does not work
+ "good" add this if the last run produced a good config
+ "bad" add this if the last run produced a bad config
+ If "good" or "bad" is not specified, then it is the start of a new bisect
+
+ Note, each run will create copy of good and bad configs with ".tmp" appended.
+
+EOF
+;
+
+ exit(-1);
+}
+
+sub doprint {
+ print @_;
+}
+
+sub dodie {
+ doprint "CRITICAL FAILURE... ", @_, "\n";
+
+ die @_, "\n";
+}
+
+sub expand_path {
+ my ($file) = @_;
+
+ if ($file =~ m,^/,) {
+ return $file;
+ }
+ return "$pwd/$file";
+}
+
+sub read_prompt {
+ my ($cancel, $prompt) = @_;
+
+ my $ans;
+
+ for (;;) {
+ if ($cancel) {
+ print "$prompt [y/n/C] ";
+ } else {
+ print "$prompt [y/N] ";
+ }
+ $ans = <STDIN>;
+ chomp $ans;
+ if ($ans =~ /^\s*$/) {
+ if ($cancel) {
+ $ans = "c";
+ } else {
+ $ans = "n";
+ }
+ }
+ last if ($ans =~ /^y$/i || $ans =~ /^n$/i);
+ if ($cancel) {
+ last if ($ans =~ /^c$/i);
+ print "Please answer either 'y', 'n' or 'c'.\n";
+ } else {
+ print "Please answer either 'y' or 'n'.\n";
+ }
+ }
+ if ($ans =~ /^c/i) {
+ exit;
+ }
+ if ($ans !~ /^y$/i) {
+ return 0;
+ }
+ return 1;
+}
+
+sub read_yn {
+ my ($prompt) = @_;
+
+ return read_prompt 0, $prompt;
+}
+
+sub read_ync {
+ my ($prompt) = @_;
+
+ return read_prompt 1, $prompt;
+}
+
+sub run_command {
+ my ($command, $redirect) = @_;
+ my $start_time;
+ my $end_time;
+ my $dord = 0;
+ my $pid;
+
+ $start_time = time;
+
+ doprint("$command ... ");
+
+ $pid = open(CMD, "$command 2>&1 |") or
+ dodie "unable to exec $command";
+
+ if (defined($redirect)) {
+ open (RD, ">$redirect") or
+ dodie "failed to write to redirect $redirect";
+ $dord = 1;
+ }
+
+ while (<CMD>) {
+ print RD if ($dord);
+ }
+
+ waitpid($pid, 0);
+ my $failed = $?;
+
+ close(CMD);
+ close(RD) if ($dord);
+
+ $end_time = time;
+ my $delta = $end_time - $start_time;
+
+ if ($delta == 1) {
+ doprint "[1 second] ";
+ } else {
+ doprint "[$delta seconds] ";
+ }
+
+ if ($failed) {
+ doprint "FAILED!\n";
+ } else {
+ doprint "SUCCESS\n";
+ }
+
+ return !$failed;
+}
+
+###### CONFIG BISECT ######
+
+# config_ignore holds the configs that were set (or unset) for
+# a good config and we will ignore these configs for the rest
+# of a config bisect. These configs stay as they were.
+my %config_ignore;
+
+# config_set holds what all configs were set as.
+my %config_set;
+
+# config_off holds the set of configs that the bad config had disabled.
+# We need to record them and set them in the .config when running
+# olddefconfig, because olddefconfig keeps the defaults.
+my %config_off;
+
+# config_off_tmp holds a set of configs to turn off for now
+my @config_off_tmp;
+
+# config_list is the set of configs that are being tested
+my %config_list;
+my %null_config;
+
+my %dependency;
+
+my $make;
+
+sub make_oldconfig {
+
+ if (!run_command "$make olddefconfig") {
+ # Perhaps olddefconfig doesn't exist in this version of the kernel
+ # try oldnoconfig
+ doprint "olddefconfig failed, trying make oldnoconfig\n";
+ if (!run_command "$make oldnoconfig") {
+ doprint "oldnoconfig failed, trying yes '' | make oldconfig\n";
+ # try a yes '' | oldconfig
+ run_command "yes '' | $make oldconfig" or
+ dodie "failed make config oldconfig";
+ }
+ }
+}
+
+sub assign_configs {
+ my ($hash, $config) = @_;
+
+ doprint "Reading configs from $config\n";
+
+ open (IN, $config)
+ or dodie "Failed to read $config";
+
+ while (<IN>) {
+ chomp;
+ if (/^((CONFIG\S*)=.*)/) {
+ ${$hash}{$2} = $1;
+ } elsif (/^(# (CONFIG\S*) is not set)/) {
+ ${$hash}{$2} = $1;
+ }
+ }
+
+ close(IN);
+}
+
+sub process_config_ignore {
+ my ($config) = @_;
+
+ assign_configs \%config_ignore, $config;
+}
+
+sub get_dependencies {
+ my ($config) = @_;
+
+ my $arr = $dependency{$config};
+ if (!defined($arr)) {
+ return ();
+ }
+
+ my @deps = @{$arr};
+
+ foreach my $dep (@{$arr}) {
+ print "ADD DEP $dep\n";
+ @deps = (@deps, get_dependencies $dep);
+ }
+
+ return @deps;
+}
+
+sub save_config {
+ my ($pc, $file) = @_;
+
+ my %configs = %{$pc};
+
+ doprint "Saving configs into $file\n";
+
+ open(OUT, ">$file") or dodie "Can not write to $file";
+
+ foreach my $config (keys %configs) {
+ print OUT "$configs{$config}\n";
+ }
+ close(OUT);
+}
+
+sub create_config {
+ my ($name, $pc) = @_;
+
+ doprint "Creating old config from $name configs\n";
+
+ save_config $pc, $output_config;
+
+ make_oldconfig;
+}
+
+# compare two config hashes, and return configs with different vals.
+# It returns B's config values, but you can use A to see what A was.
+sub diff_config_vals {
+ my ($pa, $pb) = @_;
+
+ # crappy Perl way to pass in hashes.
+ my %a = %{$pa};
+ my %b = %{$pb};
+
+ my %ret;
+
+ foreach my $item (keys %a) {
+ if (defined($b{$item}) && $b{$item} ne $a{$item}) {
+ $ret{$item} = $b{$item};
+ }
+ }
+
+ return %ret;
+}
+
+# compare two config hashes and return the configs in B but not A
+sub diff_configs {
+ my ($pa, $pb) = @_;
+
+ my %ret;
+
+ # crappy Perl way to pass in hashes.
+ my %a = %{$pa};
+ my %b = %{$pb};
+
+ foreach my $item (keys %b) {
+ if (!defined($a{$item})) {
+ $ret{$item} = $b{$item};
+ }
+ }
+
+ return %ret;
+}
+
+# return if two configs are equal or not
+# 0 is equal +1 b has something a does not
+# +1 if a and b have a different item.
+# -1 if a has something b does not
+sub compare_configs {
+ my ($pa, $pb) = @_;
+
+ my %ret;
+
+ # crappy Perl way to pass in hashes.
+ my %a = %{$pa};
+ my %b = %{$pb};
+
+ foreach my $item (keys %b) {
+ if (!defined($a{$item})) {
+ return 1;
+ }
+ if ($a{$item} ne $b{$item}) {
+ return 1;
+ }
+ }
+
+ foreach my $item (keys %a) {
+ if (!defined($b{$item})) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+sub process_failed {
+ my ($config) = @_;
+
+ doprint "\n\n***************************************\n";
+ doprint "Found bad config: $config\n";
+ doprint "***************************************\n\n";
+}
+
+sub process_new_config {
+ my ($tc, $nc, $gc, $bc) = @_;
+
+ my %tmp_config = %{$tc};
+ my %good_configs = %{$gc};
+ my %bad_configs = %{$bc};
+
+ my %new_configs;
+
+ my $runtest = 1;
+ my $ret;
+
+ create_config "tmp_configs", \%tmp_config;
+ assign_configs \%new_configs, $output_config;
+
+ $ret = compare_configs \%new_configs, \%bad_configs;
+ if (!$ret) {
+ doprint "New config equals bad config, try next test\n";
+ $runtest = 0;
+ }
+
+ if ($runtest) {
+ $ret = compare_configs \%new_configs, \%good_configs;
+ if (!$ret) {
+ doprint "New config equals good config, try next test\n";
+ $runtest = 0;
+ }
+ }
+
+ %{$nc} = %new_configs;
+
+ return $runtest;
+}
+
+sub convert_config {
+ my ($config) = @_;
+
+ if ($config =~ /^# (.*) is not set/) {
+ $config = "$1=n";
+ }
+
+ $config =~ s/^CONFIG_//;
+ return $config;
+}
+
+sub print_config {
+ my ($sym, $config) = @_;
+
+ $config = convert_config $config;
+ doprint "$sym$config\n";
+}
+
+sub print_config_compare {
+ my ($good_config, $bad_config) = @_;
+
+ $good_config = convert_config $good_config;
+ $bad_config = convert_config $bad_config;
+
+ my $good_value = $good_config;
+ my $bad_value = $bad_config;
+ $good_value =~ s/(.*)=//;
+ my $config = $1;
+
+ $bad_value =~ s/.*=//;
+
+ doprint " $config $good_value -> $bad_value\n";
+}
+
+# Pass in:
+# $phalf: half of the configs names you want to add
+# $oconfigs: The orginial configs to start with
+# $sconfigs: The source to update $oconfigs with (from $phalf)
+# $which: The name of which half that is updating (top / bottom)
+# $type: The name of the source type (good / bad)
+sub make_half {
+ my ($phalf, $oconfigs, $sconfigs, $which, $type) = @_;
+
+ my @half = @{$phalf};
+ my %orig_configs = %{$oconfigs};
+ my %source_configs = %{$sconfigs};
+
+ my %tmp_config = %orig_configs;
+
+ doprint "Settings bisect with $which half of $type configs:\n";
+ foreach my $item (@half) {
+ doprint "Updating $item to $source_configs{$item}\n";
+ $tmp_config{$item} = $source_configs{$item};
+ }
+
+ return %tmp_config;
+}
+
+sub run_config_bisect {
+ my ($pgood, $pbad) = @_;
+
+ my %good_configs = %{$pgood};
+ my %bad_configs = %{$pbad};
+
+ my %diff_configs = diff_config_vals \%good_configs, \%bad_configs;
+ my %b_configs = diff_configs \%good_configs, \%bad_configs;
+ my %g_configs = diff_configs \%bad_configs, \%good_configs;
+
+ # diff_arr is what is in both good and bad but are different (y->n)
+ my @diff_arr = keys %diff_configs;
+ my $len_diff = $#diff_arr + 1;
+
+ # b_arr is what is in bad but not in good (has depends)
+ my @b_arr = keys %b_configs;
+ my $len_b = $#b_arr + 1;
+
+ # g_arr is what is in good but not in bad
+ my @g_arr = keys %g_configs;
+ my $len_g = $#g_arr + 1;
+
+ my $runtest = 0;
+ my %new_configs;
+ my $ret;
+
+ # Look at the configs that are different between good and bad.
+ # This does not include those that depend on other configs
+ # (configs depending on other configs that are not set would
+ # not show up even as a "# CONFIG_FOO is not set"
+
+
+ doprint "# of configs to check: $len_diff\n";
+ doprint "# of configs showing only in good: $len_g\n";
+ doprint "# of configs showing only in bad: $len_b\n";
+
+ if ($len_diff > 0) {
+ # Now test for different values
+
+ doprint "Configs left to check:\n";
+ doprint " Good Config\t\t\tBad Config\n";
+ doprint " -----------\t\t\t----------\n";
+ foreach my $item (@diff_arr) {
+ doprint " $good_configs{$item}\t$bad_configs{$item}\n";
+ }
+
+ my $half = int($#diff_arr / 2);
+ my @tophalf = @diff_arr[0 .. $half];
+
+ doprint "Set tmp config to be good config with some bad config values\n";
+
+ my %tmp_config = make_half \@tophalf, \%good_configs,
+ \%bad_configs, "top", "bad";
+
+ $runtest = process_new_config \%tmp_config, \%new_configs,
+ \%good_configs, \%bad_configs;
+
+ if (!$runtest) {
+ doprint "Set tmp config to be bad config with some good config values\n";
+
+ my %tmp_config = make_half \@tophalf, \%bad_configs,
+ \%good_configs, "top", "good";
+
+ $runtest = process_new_config \%tmp_config, \%new_configs,
+ \%good_configs, \%bad_configs;
+ }
+ }
+
+ if (!$runtest && $len_diff > 0) {
+ # do the same thing, but this time with bottom half
+
+ my $half = int($#diff_arr / 2);
+ my @bottomhalf = @diff_arr[$half+1 .. $#diff_arr];
+
+ doprint "Set tmp config to be good config with some bad config values\n";
+
+ my %tmp_config = make_half \@bottomhalf, \%good_configs,
+ \%bad_configs, "bottom", "bad";
+
+ $runtest = process_new_config \%tmp_config, \%new_configs,
+ \%good_configs, \%bad_configs;
+
+ if (!$runtest) {
+ doprint "Set tmp config to be bad config with some good config values\n";
+
+ my %tmp_config = make_half \@bottomhalf, \%bad_configs,
+ \%good_configs, "bottom", "good";
+
+ $runtest = process_new_config \%tmp_config, \%new_configs,
+ \%good_configs, \%bad_configs;
+ }
+ }
+
+ if ($runtest) {
+ make_oldconfig;
+ doprint "READY TO TEST .config IN $build\n";
+ return 0;
+ }
+
+ doprint "\n%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n";
+ doprint "Hmm, can't make any more changes without making good == bad?\n";
+ doprint "Difference between good (+) and bad (-)\n";
+
+ foreach my $item (keys %bad_configs) {
+ if (!defined($good_configs{$item})) {
+ print_config "-", $bad_configs{$item};
+ }
+ }
+
+ foreach my $item (keys %good_configs) {
+ next if (!defined($bad_configs{$item}));
+ if ($good_configs{$item} ne $bad_configs{$item}) {
+ print_config_compare $good_configs{$item}, $bad_configs{$item};
+ }
+ }
+
+ foreach my $item (keys %good_configs) {
+ if (!defined($bad_configs{$item})) {
+ print_config "+", $good_configs{$item};
+ }
+ }
+ return -1;
+}
+
+sub config_bisect {
+ my ($good_config, $bad_config) = @_;
+ my $ret;
+
+ my %good_configs;
+ my %bad_configs;
+ my %tmp_configs;
+
+ doprint "Run good configs through make oldconfig\n";
+ assign_configs \%tmp_configs, $good_config;
+ create_config "$good_config", \%tmp_configs;
+ assign_configs \%good_configs, $output_config;
+
+ doprint "Run bad configs through make oldconfig\n";
+ assign_configs \%tmp_configs, $bad_config;
+ create_config "$bad_config", \%tmp_configs;
+ assign_configs \%bad_configs, $output_config;
+
+ save_config \%good_configs, $good_config;
+ save_config \%bad_configs, $bad_config;
+
+ return run_config_bisect \%good_configs, \%bad_configs;
+}
+
+while ($#ARGV >= 0) {
+ if ($ARGV[0] !~ m/^-/) {
+ last;
+ }
+ my $opt = shift @ARGV;
+
+ if ($opt eq "-b") {
+ $val = shift @ARGV;
+ if (!defined($val)) {
+ die "-b requires value\n";
+ }
+ $build = $val;
+ }
+
+ elsif ($opt eq "-l") {
+ $val = shift @ARGV;
+ if (!defined($val)) {
+ die "-l requires value\n";
+ }
+ $tree = $val;
+ }
+
+ elsif ($opt eq "-r") {
+ $reset_bisect = 1;
+ }
+
+ elsif ($opt eq "-h") {
+ usage;
+ }
+
+ else {
+ die "Unknow option $opt\n";
+ }
+}
+
+$build = $tree if (!defined($build));
+
+$tree = expand_path $tree;
+$build = expand_path $build;
+
+if ( ! -d $tree ) {
+ die "$tree not a directory\n";
+}
+
+if ( ! -d $build ) {
+ die "$build not a directory\n";
+}
+
+usage if $#ARGV < 1;
+
+if ($#ARGV == 1) {
+ $start = 1;
+} elsif ($#ARGV == 2) {
+ $val = $ARGV[2];
+ if ($val ne "good" && $val ne "bad") {
+ die "Unknown command '$val', bust be either \"good\" or \"bad\"\n";
+ }
+} else {
+ usage;
+}
+
+my $good_start = expand_path $ARGV[0];
+my $bad_start = expand_path $ARGV[1];
+
+my $good = "$good_start.tmp";
+my $bad = "$bad_start.tmp";
+
+$make = "make";
+
+if ($build ne $tree) {
+ $make = "make O=$build"
+}
+
+$output_config = "$build/.config";
+
+if ($start) {
+ if ( ! -f $good_start ) {
+ die "$good_start not found\n";
+ }
+ if ( ! -f $bad_start ) {
+ die "$bad_start not found\n";
+ }
+ if ( -f $good || -f $bad ) {
+ my $p = "";
+
+ if ( -f $good ) {
+ $p = "$good exists\n";
+ }
+
+ if ( -f $bad ) {
+ $p = "$p$bad exists\n";
+ }
+
+ if (!defined($reset_bisect)) {
+ if (!read_yn "${p}Overwrite and start new bisect anyway?") {
+ exit (-1);
+ }
+ }
+ }
+ run_command "cp $good_start $good" or die "failed to copy to $good\n";
+ run_command "cp $bad_start $bad" or die "faield to copy to $bad\n";
+} else {
+ if ( ! -f $good ) {
+ die "Can not find file $good\n";
+ }
+ if ( ! -f $bad ) {
+ die "Can not find file $bad\n";
+ }
+ if ($val eq "good") {
+ run_command "cp $output_config $good" or die "failed to copy $config to $good\n";
+ } elsif ($val eq "bad") {
+ run_command "cp $output_config $bad" or die "failed to copy $config to $bad\n";
+ }
+}
+
+chdir $tree || die "can't change directory to $tree";
+
+my $ret = config_bisect $good, $bad;
+
+if (!$ret) {
+ exit(0);
+}
+
+if ($ret > 0) {
+ doprint "Cleaning temp files\n";
+ run_command "rm $good";
+ run_command "rm $bad";
+ exit(1);
+} else {
+ doprint "See good and bad configs for details:\n";
+ doprint "good: $good\n";
+ doprint "bad: $bad\n";
+ doprint "%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n";
+}
+exit(2);
diff --git a/tools/testing/ktest/examples/README b/tools/testing/ktest/examples/README
new file mode 100644
index 000000000..a12d295a0
--- /dev/null
+++ b/tools/testing/ktest/examples/README
@@ -0,0 +1,32 @@
+This directory contains example configs to use ktest for various tasks.
+The configs still need to be customized for your environment, but it
+is broken up by task which makes it easier to understand how to set up
+ktest.
+
+The configs are based off of real working configs but have been modified
+and commented to show more generic use cases that are more helpful for
+developers.
+
+crosstests.conf - this config shows an example of testing a git repo against
+ lots of different architectures. It only does build tests, but makes
+ it easy to compile test different archs. You can download the arch
+ cross compilers from:
+ http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
+
+test.conf - A generic example of a config. This is based on an actual config
+ used to perform real testing.
+
+kvm.conf - A example of a config that is used to test a virtual guest running
+ on a host.
+
+snowball.conf - An example config that was used to demo ktest.pl against
+ a snowball ARM board.
+
+include/ - The include directory holds default configs that can be
+ included into other configs. This is a real use example that shows how
+ to reuse configs for various machines or set ups. The files here
+ are included by other config files, where the other config files define
+ options and variables that will make the included config work for the
+ given environment.
+
+
diff --git a/tools/testing/ktest/examples/crosstests.conf b/tools/testing/ktest/examples/crosstests.conf
new file mode 100644
index 000000000..6907f3259
--- /dev/null
+++ b/tools/testing/ktest/examples/crosstests.conf
@@ -0,0 +1,225 @@
+#
+# Example config for cross compiling
+#
+# In this config, it is expected that the tool chains from:
+#
+# http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
+#
+# running on a x86_64 system have been downloaded and installed into:
+#
+# /usr/local/
+#
+# such that the compiler binaries are something like:
+#
+# /usr/local/gcc-4.5.2-nolibc/mips-linux/bin/mips-linux-gcc
+#
+# Some of the archs will use gcc-4.5.1 instead of gcc-4.5.2
+# this config uses variables to differentiate them.
+#
+# Comments describe some of the options, but full descriptions of
+# options are described in the samples.conf file.
+
+# ${PWD} is defined by ktest.pl to be the directory that the user
+# was in when they executed ktest.pl. It may be better to hardcode the
+# path name here. THIS_DIR is the variable used through out the config file
+# in case you want to change it.
+
+THIS_DIR := ${PWD}
+
+# Update the BUILD_DIR option to the location of your git repo you want to test.
+BUILD_DIR = ${THIS_DIR}/linux.git
+
+# The build will go into this directory. It will be created when you run the test.
+OUTPUT_DIR = ${THIS_DIR}/cross-compile
+
+# The build will be compiled with -j8
+BUILD_OPTIONS = -j8
+
+# The test will not stop when it hits a failure.
+DIE_ON_FAILURE = 0
+
+# If you want to have ktest.pl store the failure somewhere, uncomment this option
+# and change the directory where ktest should store the failures.
+#STORE_FAILURES = ${THIS_DIR}/failures
+
+# The log file is stored in the OUTPUT_DIR called cross.log
+# If you enable this, you need to create the OUTPUT_DIR. It wont be created for you.
+LOG_FILE = ${OUTPUT_DIR}/cross.log
+
+# The log file will be cleared each time you run ktest.
+CLEAR_LOG = 1
+
+# As some archs do not build with the defconfig, they have been marked
+# to be ignored. If you want to test them anyway, change DO_FAILED to 1.
+# If a test that has been marked as DO_FAILED passes, then you should change
+# that test to be DO_DEFAULT
+
+DO_FAILED := 0
+DO_DEFAULT := 1
+
+# By setting both DO_FAILED and DO_DEFAULT to zero, you can pick a single
+# arch that you want to test. (uncomment RUN and chose your arch)
+#RUN := arm
+
+# At the bottom of the config file exists a bisect test. You can update that
+# test and set DO_FAILED and DO_DEFAULT to zero, and uncomment this variable
+# to run the bisect on the arch.
+#RUN := bisect
+
+# By default all tests will be running gcc 4.5.2. Some tests are using 4.5.1
+# and they select that in the test.
+# Note: GCC_VER is declared as on option and not a variable ('=' instead of ':=')
+# This is important. A variable is used only in the config file and if it is set
+# it stays that way for the rest of the config file until it is change again.
+# Here we want GCC_VER to remain persistent and change for each test, as it is used in
+# the MAKE_CMD. By using '=' instead of ':=' we achieve our goal.
+
+GCC_VER = 4.5.2
+MAKE_CMD = PATH=/usr/local/gcc-${GCC_VER}-nolibc/${CROSS}/bin:$PATH CROSS_COMPILE=${CROSS}- make ARCH=${ARCH}
+
+# all tests are only doing builds.
+TEST_TYPE = build
+
+# If you want to add configs on top of the defconfig, you can add those configs into
+# the add-config file and uncomment this option. This is useful if you want to test
+# all cross compiles with PREEMPT set, or TRACING on, etc.
+#ADD_CONFIG = ${THIS_DIR}/add-config
+
+# All tests are using defconfig
+BUILD_TYPE = defconfig
+
+# The test names will have the arch and cross compiler used. This will be shown in
+# the results.
+TEST_NAME = ${ARCH} ${CROSS}
+
+# alpha
+TEST_START IF ${RUN} == alpha || ${DO_DEFAULT}
+# Notice that CROSS and ARCH are also options and not variables (again '=' instead
+# of ':='). This is because TEST_NAME and MAKE_CMD wil use them for each test.
+# Only options are available during runs. Variables are only present in parsing the
+# config file.
+CROSS = alpha-linux
+ARCH = alpha
+
+# arm
+TEST_START IF ${RUN} == arm || ${DO_DEFAULT}
+CROSS = arm-unknown-linux-gnueabi
+ARCH = arm
+
+# ia64
+TEST_START IF ${RUN} == ia64 || ${DO_DEFAULT}
+CROSS = ia64-linux
+ARCH = ia64
+
+# m68k fails with error?
+TEST_START IF ${RUN} == m68k || ${DO_DEFAULT}
+CROSS = m68k-linux
+ARCH = m68k
+
+# mips64
+TEST_START IF ${RUN} == mips || ${RUN} == mips64 || ${DO_DEFAULT}
+CROSS = mips64-linux
+ARCH = mips
+
+# mips32
+TEST_START IF ${RUN} == mips || ${RUN} == mips32 || ${DO_DEFAULT}
+CROSS = mips-linux
+ARCH = mips
+
+# parisc64 failed?
+TEST_START IF ${RUN} == hppa || ${RUN} == hppa64 || ${DO_FAILED}
+CROSS = hppa64-linux
+ARCH = parisc
+
+# parisc
+TEST_START IF ${RUN} == hppa || ${RUN} == hppa32 || ${DO_FAILED}
+CROSS = hppa-linux
+ARCH = parisc
+
+# ppc
+TEST_START IF ${RUN} == ppc || ${RUN} == ppc32 || ${DO_DEFAULT}
+CROSS = powerpc-linux
+ARCH = powerpc
+
+# ppc64
+TEST_START IF ${RUN} == ppc || ${RUN} == ppc64 || ${DO_DEFAULT}
+CROSS = powerpc64-linux
+ARCH = powerpc
+
+# s390
+TEST_START IF ${RUN} == s390 || ${DO_DEFAULT}
+CROSS = s390x-linux
+ARCH = s390
+
+# sh
+TEST_START IF ${RUN} == sh || ${DO_DEFAULT}
+CROSS = sh4-linux
+ARCH = sh
+
+# sparc64
+TEST_START IF ${RUN} == sparc || ${RUN} == sparc64 || ${DO_DEFAULT}
+CROSS = sparc64-linux
+ARCH = sparc64
+
+# sparc
+TEST_START IF ${RUN} == sparc || ${RUN} == sparc32 || ${DO_DEFAULT}
+CROSS = sparc-linux
+ARCH = sparc
+
+# xtensa failed
+TEST_START IF ${RUN} == xtensa || ${DO_FAILED}
+CROSS = xtensa-linux
+ARCH = xtensa
+
+# UML
+TEST_START IF ${RUN} == uml || ${DO_DEFAULT}
+MAKE_CMD = make ARCH=um SUBARCH=x86_64
+ARCH = uml
+CROSS =
+
+TEST_START IF ${RUN} == x86 || ${RUN} == i386 || ${DO_DEFAULT}
+MAKE_CMD = make ARCH=i386
+ARCH = i386
+CROSS =
+
+TEST_START IF ${RUN} == x86 || ${RUN} == x86_64 || ${DO_DEFAULT}
+MAKE_CMD = make ARCH=x86_64
+ARCH = x86_64
+CROSS =
+
+#################################
+
+# This is a bisect if needed. You need to give it a MIN_CONFIG that
+# will be the config file it uses. Basically, just copy the created defconfig
+# for the arch someplace and point MIN_CONFIG to it.
+TEST_START IF ${RUN} == bisect
+MIN_CONFIG = ${THIS_DIR}/min-config
+CROSS = s390x-linux
+ARCH = s390
+TEST_TYPE = bisect
+BISECT_TYPE = build
+BISECT_GOOD = v3.1
+BISECT_BAD = v3.2
+CHECKOUT = v3.2
+
+#################################
+
+# These defaults are needed to keep ktest.pl from complaining. They are
+# ignored because the test does not go pass the build. No install or
+# booting of the target images.
+
+DEFAULTS
+MACHINE = crosstest
+SSH_USER = root
+BUILD_TARGET = cross
+TARGET_IMAGE = image
+POWER_CYCLE = cycle
+CONSOLE = console
+LOCALVERSION = version
+GRUB_MENU = grub
+
+REBOOT_ON_ERROR = 0
+POWEROFF_ON_ERROR = 0
+POWEROFF_ON_SUCCESS = 0
+REBOOT_ON_SUCCESS = 0
+
diff --git a/tools/testing/ktest/examples/include/bisect.conf b/tools/testing/ktest/examples/include/bisect.conf
new file mode 100644
index 000000000..009bea65b
--- /dev/null
+++ b/tools/testing/ktest/examples/include/bisect.conf
@@ -0,0 +1,90 @@
+#
+# This example shows the bisect tests (git bisect and config bisect)
+#
+
+
+# The config that includes this file may define a RUN_TEST
+# variable that will tell this config what test to run.
+# (what to set the TEST option to).
+#
+DEFAULTS IF NOT DEFINED RUN_TEST
+# Requires that hackbench is in the PATH
+RUN_TEST := ${SSH} hackbench 50
+
+
+# Set TEST to 'bisect' to do a normal git bisect. You need
+# to modify the options below to make it bisect the exact
+# commits you are interested in.
+#
+TEST_START IF ${TEST} == bisect
+TEST_TYPE = bisect
+# You must set the commit that was considered good (git bisect good)
+BISECT_GOOD = v3.3
+# You must set the commit that was considered bad (git bisect bad)
+BISECT_BAD = HEAD
+# It's best to specify the branch to checkout before starting the bisect.
+CHECKOUT = origin/master
+# This can be build, boot, or test. Here we are doing a bisect
+# that requires to run a test to know if the bisect was good or bad.
+# The test should exit with 0 on good, non-zero for bad. But see
+# the BISECT_RET_* options in samples.conf to override this.
+BISECT_TYPE = test
+TEST = ${RUN_TEST}
+# It is usually a good idea to confirm that the GOOD and the BAD
+# commits are truly good and bad respectively. Having BISECT_CHECK
+# set to 1 will check both that the good commit works and the bad
+# commit fails. If you only want to check one or the other,
+# set BISECT_CHECK to 'good' or to 'bad'.
+BISECT_CHECK = 1
+#BISECT_CHECK = good
+#BISECT_CHECK = bad
+
+# Usually it's a good idea to specify the exact config you
+# want to use throughout the entire bisect. Here we placed
+# it in the directory we called ktest.pl from and named it
+# 'config-bisect'.
+MIN_CONFIG = ${THIS_DIR}/config-bisect
+# By default, if we are doing a BISECT_TYPE = test run but the
+# build or boot fails, ktest.pl will do a 'git bisect skip'.
+# Uncomment the below option to make ktest stop testing on such
+# an error.
+#BISECT_SKIP = 0
+# Now if you had BISECT_SKIP = 0 and the test fails, you can
+# examine what happened and then do 'git bisect log > /tmp/replay'
+# Set BISECT_REPLAY to /tmp/replay and ktest.pl will run the
+# 'git bisect replay /tmp/replay' before continuing the bisect test.
+#BISECT_REPLAY = /tmp/replay
+# If you used BISECT_REPLAY after the bisect test failed, you may
+# not want to continue the bisect on that commit that failed.
+# By setting BISECT_START to a new commit. ktest.pl will checkout
+# that commit after it has performed the 'git bisect replay' but
+# before it continues running the bisect test.
+#BISECT_START = 2545eb6198e7e1ec50daa0cfc64a4cdfecf24ec9
+
+# Now if you don't trust ktest.pl to make the decisions for you, then
+# set BISECT_MANUAL to 1. This will cause ktest.pl not to decide
+# if the commit was good or bad. Instead, it will ask you to tell
+# it if the current commit was good. In the mean time, you could
+# take the result, load it on any machine you want. Run several tests,
+# or whatever you feel like. Then, when you are happy, you can tell
+# ktest if you think it was good or not and ktest.pl will continue
+# the git bisect. You can even change what commit it is currently at.
+#BISECT_MANUAL = 1
+
+
+# One of the unique tests that ktest does is the config bisect.
+# Currently (which hopefully will be fixed soon), the bad config
+# must be a superset of the good config. This is because it only
+# searches for a config that causes the target to fail. If the
+# good config is not a subset of the bad config, or if the target
+# fails because of a lack of a config, then it will not find
+# the config for you.
+TEST_START IF ${TEST} == config-bisect
+TEST_TYPE = config_bisect
+# set to build, boot, test
+CONFIG_BISECT_TYPE = boot
+# Set the config that is considered bad.
+CONFIG_BISECT = ${THIS_DIR}/config-bad
+# This config is optional. By default it uses the
+# MIN_CONFIG as the good config.
+CONFIG_BISECT_GOOD = ${THIS_DIR}/config-good
diff --git a/tools/testing/ktest/examples/include/defaults.conf b/tools/testing/ktest/examples/include/defaults.conf
new file mode 100644
index 000000000..63a1a83f4
--- /dev/null
+++ b/tools/testing/ktest/examples/include/defaults.conf
@@ -0,0 +1,157 @@
+# This file holds defaults for most the tests. It defines the options that
+# are most common to tests that are likely to be shared.
+#
+# Note, after including this file, a config file may override any option
+# with a DEFAULTS OVERRIDE section.
+#
+
+# For those cases that use the same machine to boot a 64 bit
+# and a 32 bit version. The MACHINE is the DNS name to get to the
+# box (usually different if it was 64 bit or 32 bit) but the
+# BOX here is defined as a variable that will be the name of the box
+# itself. It is useful for calling scripts that will power cycle
+# the box, as only one script needs to be created to power cycle
+# even though the box itself has multiple operating systems on it.
+# By default, BOX and MACHINE are the same.
+
+DEFAULTS IF NOT DEFINED BOX
+BOX := ${MACHINE}
+
+
+# Consider each box as 64 bit box, unless the config including this file
+# has defined BITS = 32
+
+DEFAULTS IF NOT DEFINED BITS
+BITS := 64
+
+
+DEFAULTS
+
+# THIS_DIR is used through out the configs and defaults to ${PWD} which
+# is the directory that ktest.pl was called from.
+
+THIS_DIR := ${PWD}
+
+
+# to organize your configs, having each machine save their configs
+# into a separate directly is useful.
+CONFIG_DIR := ${THIS_DIR}/configs/${MACHINE}
+
+# Reset the log before running each test.
+CLEAR_LOG = 1
+
+# As installing kernels usually requires root privilege, default the
+# user on the target as root. It is also required that the target
+# allows ssh to root from the host without asking for a password.
+
+SSH_USER = root
+
+# For accesing the machine, we will ssh to root@machine.
+SSH := ssh ${SSH_USER}@${MACHINE}
+
+# Update this. The default here is ktest will ssh to the target box
+# and run a script called 'run-test' located on that box.
+TEST = ${SSH} run-test
+
+# Point build dir to the git repo you use
+BUILD_DIR = ${THIS_DIR}/linux.git
+
+# Each machine will have its own output build directory.
+OUTPUT_DIR = ${THIS_DIR}/build/${MACHINE}
+
+# Yes this config is focused on x86 (but ktest works for other archs too)
+BUILD_TARGET = arch/x86/boot/bzImage
+TARGET_IMAGE = /boot/vmlinuz-test
+
+# have directory for the scripts to reboot and power cycle the boxes
+SCRIPTS_DIR := ${THIS_DIR}/scripts
+
+# You can have each box/machine have a script to power cycle it.
+# Name your script <box>-cycle.
+POWER_CYCLE = ${SCRIPTS_DIR}/${BOX}-cycle
+
+# This script is used to power off the box.
+POWER_OFF = ${SCRIPTS_DIR}/${BOX}-poweroff
+
+# Keep your test kernels separate from your other kernels.
+LOCALVERSION = -test
+
+# The /boot/grub/menu.lst is searched for the line:
+# title Test Kernel
+# and ktest will use that kernel to reboot into.
+# For grub2 or other boot loaders, you need to set BOOT_TYPE
+# to 'script' and define other ways to load the kernel.
+# See snowball.conf example.
+#
+GRUB_MENU = Test Kernel
+
+# The kernel build will use this option.
+BUILD_OPTIONS = -j8
+
+# Keeping the log file with the output dir is convenient.
+LOG_FILE = ${OUTPUT_DIR}/${MACHINE}.log
+
+# Each box should have their own minum configuration
+# See min-config.conf
+MIN_CONFIG = ${CONFIG_DIR}/config-min
+
+# For things like randconfigs, there may be configs you find that
+# are already broken, or there may be some configs that you always
+# want set. Uncomment ADD_CONFIG and point it to the make config files
+# that set the configs you want to keep on (or off) in your build.
+# ADD_CONFIG is usually something to add configs to all machines,
+# where as, MIN_CONFIG is specific per machine.
+#ADD_CONFIG = ${THIS_DIR}/config-broken ${THIS_DIR}/config-general
+
+# To speed up reboots for bisects and patchcheck, instead of
+# waiting 60 seconds for the console to be idle, if this line is
+# seen in the console output, ktest will know the good kernel has
+# finished rebooting and it will be able to continue the tests.
+REBOOT_SUCCESS_LINE = ${MACHINE} login:
+
+# The following is different ways to end the test.
+# by setting the variable REBOOT to: none, error, fail or
+# something else, ktest will power cycle or reboot the target box
+# at the end of the tests.
+#
+# REBOOT := none
+# Don't do anything at the end of the test.
+#
+# REBOOT := error
+# Reboot the box if ktest detects an error
+#
+# REBOOT := fail
+# Do not stop on failure, and after all tests are complete
+# power off the box (for both success and error)
+# This is good to run over a weekend and you don't want to waste
+# electricity.
+#
+
+DEFAULTS IF ${REBOOT} == none
+REBOOT_ON_SUCCESS = 0
+REBOOT_ON_ERROR = 0
+POWEROFF_ON_ERROR = 0
+POWEROFF_ON_SUCCESS = 0
+
+DEFAULTS ELSE IF ${REBOOT} == error
+REBOOT_ON_SUCCESS = 0
+REBOOT_ON_ERROR = 1
+POWEROFF_ON_ERROR = 0
+POWEROFF_ON_SUCCESS = 0
+
+DEFAULTS ELSE IF ${REBOOT} == fail
+REBOOT_ON_SUCCESS = 0
+POWEROFF_ON_ERROR = 1
+POWEROFF_ON_SUCCESS = 1
+POWEROFF_AFTER_HALT = 120
+DIE_ON_FAILURE = 0
+
+# Store the failure information into this directory
+# such as the .config, dmesg, and build log.
+STORE_FAILURES = ${THIS_DIR}/failures
+
+DEFAULTS ELSE
+REBOOT_ON_SUCCESS = 1
+REBOOT_ON_ERROR = 1
+POWEROFF_ON_ERROR = 0
+POWEROFF_ON_SUCCESS = 0
diff --git a/tools/testing/ktest/examples/include/min-config.conf b/tools/testing/ktest/examples/include/min-config.conf
new file mode 100644
index 000000000..c703cc46d
--- /dev/null
+++ b/tools/testing/ktest/examples/include/min-config.conf
@@ -0,0 +1,60 @@
+#
+# This file has some examples for creating a MIN_CONFIG.
+# (A .config file that is the minimum for a machine to boot, or
+# to boot and make a network connection.)
+#
+# A MIN_CONFIG is very useful as it is the minimum configuration
+# needed to boot a given machine. You can debug someone else's
+# .config by only setting the configs in your MIN_CONFIG. The closer
+# your MIN_CONFIG is to the true minimum set of configs needed to
+# boot your machine, the closer the config you test with will be
+# to the users config that had the failure.
+#
+# The make_min_config test allows you to create a MIN_CONFIG that
+# is truly the minimum set of configs needed to boot a box.
+#
+# In this example, the final config will reside in
+# ${CONFIG_DIR}/config-new-min and ${CONFIG_DIR}/config-new-min-net.
+# Just move one to the location you have set for MIN_CONFIG.
+#
+# The first test creates a MIN_CONFIG that will be the minimum
+# configuration to boot ${MACHINE} and be able to ssh to it.
+#
+# The second test creates a MIN_CONFIG that will only boot
+# the target and most likely will not let you ssh to it. (Notice
+# how the second test uses the first test's result to continue with.
+# This is because the second test config is a subset of the first).
+#
+# The ${CONFIG_DIR}/config-skip (and -net) will hold the configs
+# that ktest.pl found would not boot the target without them set.
+# The config-new-min holds configs that ktest.pl could not test
+# directly because another config that was needed to boot the box
+# selected them. Sometimes it is possible that this file will hold
+# the true minimum configuration. You can test to see if this is
+# the case by running the boot test with BOOT_TYPE = allnoconfig and
+# setting setting the MIN_CONFIG to ${CONFIG_DIR}/config-skip. If the
+# machine still boots, then you can use the config-skip as your MIN_CONFIG.
+#
+# These tests can run for several hours (and perhaps days).
+# It's OK to kill the test with a Ctrl^C. By restarting without
+# modifying this config, ktest.pl will notice that the config-new-min(-net)
+# exists, and will use that instead as the starting point.
+# The USE_OUTPUT_MIN_CONFIG is set to 1 to keep ktest.pl from asking
+# you if you want to use the OUTPUT_MIN_CONFIG as the starting point.
+# By using the OUTPUT_MIN_CONFIG as the starting point will allow ktest.pl to
+# start almost where it left off.
+#
+TEST_START IF ${TEST} == min-config
+TEST_TYPE = make_min_config
+OUTPUT_MIN_CONFIG = ${CONFIG_DIR}/config-new-min-net
+IGNORE_CONFIG = ${CONFIG_DIR}/config-skip-net
+MIN_CONFIG_TYPE = test
+TEST = ${SSH} echo hi
+USE_OUTPUT_MIN_CONFIG = 1
+
+TEST_START IF ${TEST} == min-config && ${MULTI}
+TEST_TYPE = make_min_config
+OUTPUT_MIN_CONFIG = ${CONFIG_DIR}/config-new-min
+IGNORE_CONFIG = ${CONFIG_DIR}/config-skip
+MIN_CONFIG = ${CONFIG_DIR}/config-new-min-net
+USE_OUTPUT_MIN_CONFIG = 1
diff --git a/tools/testing/ktest/examples/include/patchcheck.conf b/tools/testing/ktest/examples/include/patchcheck.conf
new file mode 100644
index 000000000..0eb0a5ac7
--- /dev/null
+++ b/tools/testing/ktest/examples/include/patchcheck.conf
@@ -0,0 +1,111 @@
+# patchcheck.conf
+#
+# This contains a test that takes two git commits and will test each
+# commit between the two. The build test will look at what files the
+# commit has touched, and if any of those files produce a warning, then
+# the build will fail.
+
+
+# PATCH_START is the commit to begin with and PATCH_END is the commit
+# to end with (inclusive). This is similar to doing a git rebase -i PATCH_START~1
+# and then testing each commit and doing a git rebase --continue.
+# You can use a SHA1, a git tag, or anything that git will accept for a checkout
+
+PATCH_START := HEAD~3
+PATCH_END := HEAD
+
+# Use the oldconfig if build_type wasn't defined
+DEFAULTS IF NOT DEFINED BUILD_TYPE
+DO_BUILD_TYPE := oldconfig
+
+DEFAULTS ELSE
+DO_BUILD_TYPE := ${BUILD_TYPE}
+
+DEFAULTS
+
+
+# Change PATCH_CHECKOUT to be the branch you want to test. The test will
+# do a git checkout of this branch before starting. Obviously both
+# PATCH_START and PATCH_END must be in this branch (and PATCH_START must
+# be contained by PATCH_END).
+
+PATCH_CHECKOUT := test/branch
+
+# Usually it's a good idea to have a set config to use for testing individual
+# patches.
+PATCH_CONFIG := ${CONFIG_DIR}/config-patchcheck
+
+# Change PATCH_TEST to run some test for each patch. Each commit that is
+# tested, after it is built and installed on the test machine, this command
+# will be executed. Usually what is done is to ssh to the target box and
+# run some test scripts. If you just want to boot test your patches
+# comment PATCH_TEST out.
+PATCH_TEST := ${SSH} "/usr/local/bin/ktest-test-script"
+
+DEFAULTS IF DEFINED PATCH_TEST
+PATCH_TEST_TYPE := test
+
+DEFAULTS ELSE
+PATCH_TEST_TYPE := boot
+
+# If for some reason a file has a warning that one of your patches touch
+# but you do not care about it, set IGNORE_WARNINGS to that commit(s)
+# (space delimited)
+#IGNORE_WARNINGS = 39eaf7ef884dcc44f7ff1bac803ca2a1dcf43544 6edb2a8a385f0cdef51dae37ff23e74d76d8a6ce
+
+# Instead of just checking for warnings to files that are changed
+# it can be advantageous to check for any new warnings. If a
+# header file is changed, it could cause a warning in a file not
+# touched by the commit. To detect these kinds of warnings, you
+# can use the WARNINGS_FILE option.
+#
+# If the variable CREATE_WARNINGS_FILE is set, this config will
+# enable the WARNINGS_FILE during the patchcheck test. Also,
+# before running the patchcheck test, it will create the
+# warnings file.
+#
+DEFAULTS IF DEFINED CREATE_WARNINGS_FILE
+WARNINGS_FILE = ${OUTPUT_DIR}/warnings_file
+
+TEST_START IF DEFINED CREATE_WARNINGS_FILE
+# WARNINGS_FILE is already set by the DEFAULTS above
+TEST_TYPE = make_warnings_file
+# Checkout the commit before the patches to test,
+# and record all the warnings that exist before the patches
+# to test are added
+CHECKOUT = ${PATCHCHECK_START}~1
+# Force a full build
+BUILD_NOCLEAN = 0
+BUILD_TYPE = ${DO_BUILD_TYPE}
+
+# If you are running a multi test, and the test failed on the first
+# test but on, say the 5th patch. If you want to restart on the
+# fifth patch, set PATCH_START1. This will make the first test start
+# from this commit instead of the PATCH_START commit.
+# Note, do not change this option. Just define PATCH_START1 in the
+# top config (the one you pass to ktest.pl), and this will use it,
+# otherwise it will just use PATCH_START if PATCH_START1 is not defined.
+DEFAULTS IF NOT DEFINED PATCH_START1
+PATCH_START1 := ${PATCH_START}
+
+TEST_START IF ${TEST} == patchcheck
+TEST_TYPE = patchcheck
+MIN_CONFIG = ${PATCH_CONFIG}
+TEST = ${PATCH_TEST}
+PATCHCHECK_TYPE = ${PATCH_TEST_TYPE}
+PATCHCHECK_START = ${PATCH_START1}
+PATCHCHECK_END = ${PATCH_END}
+CHECKOUT = ${PATCH_CHECKOUT}
+BUILD_TYPE = ${DO_BUILD_TYPE}
+
+TEST_START IF ${TEST} == patchcheck && ${MULTI}
+TEST_TYPE = patchcheck
+MIN_CONFIG = ${PATCH_CONFIG}
+TEST = ${PATCH_TEST}
+PATCHCHECK_TYPE = ${PATCH_TEST_TYPE}
+PATCHCHECK_START = ${PATCH_START}
+PATCHCHECK_END = ${PATCH_END}
+CHECKOUT = ${PATCH_CHECKOUT}
+# Use multi to test different compilers?
+MAKE_CMD = CC=gcc-4.5.1 make
+BUILD_TYPE = ${DO_BUILD_TYPE}
diff --git a/tools/testing/ktest/examples/include/tests.conf b/tools/testing/ktest/examples/include/tests.conf
new file mode 100644
index 000000000..60cedb1a1
--- /dev/null
+++ b/tools/testing/ktest/examples/include/tests.conf
@@ -0,0 +1,74 @@
+#
+# This is an example of various tests that you can run
+#
+# The variable TEST can be of boot, build, randconfig, or test.
+#
+# Note that TEST is a variable created with ':=' and only exists
+# throughout the config processing (not during the tests itself).
+#
+# The TEST option (defined with '=') is used to tell ktest.pl
+# what test to run after a successful boot. The TEST option is
+# persistent into the test runs.
+#
+
+# The config that includes this file may define a BOOT_TYPE
+# variable that tells this config what type of boot test to run.
+# If it's not defined, the below DEFAULTS will set the default
+# to 'oldconfig'.
+#
+DEFAULTS IF NOT DEFINED BOOT_TYPE
+BOOT_TYPE := oldconfig
+
+# The config that includes this file may define a RUN_TEST
+# variable that will tell this config what test to run.
+# (what to set the TEST option to).
+#
+DEFAULTS IF NOT DEFINED RUN_TEST
+# Requires that hackbench is in the PATH
+RUN_TEST := ${SSH} hackbench 50
+
+
+# If TEST is set to 'boot' then just build a kernel and boot
+# the target.
+TEST_START IF ${TEST} == boot
+TEST_TYPE = boot
+# Notice how we set the BUILD_TYPE option to the BOOT_TYPE variable.
+BUILD_TYPE = ${BOOT_TYPE}
+# Do not do a make mrproper.
+BUILD_NOCLEAN = 1
+
+# If you only want to build the kernel, and perhaps install
+# and test it yourself, then just set TEST to build.
+TEST_START IF ${TEST} == build
+TEST_TYPE = build
+BUILD_TYPE = ${BOOT_TYPE}
+BUILD_NOCLEAN = 1
+
+# Build, install, boot and test with a randconfg 10 times.
+# It is important that you have set MIN_CONFIG in the config
+# that includes this file otherwise it is likely that the
+# randconfig will not have the necessary configs needed to
+# boot your box. This version of the test requires a min
+# config that has enough to make sure the target has network
+# working.
+TEST_START ITERATE 10 IF ${TEST} == randconfig
+MIN_CONFIG = ${CONFIG_DIR}/config-min-net
+TEST_TYPE = test
+BUILD_TYPE = randconfig
+TEST = ${RUN_TEST}
+
+# This is the same as above, but only tests to a boot prompt.
+# The MIN_CONFIG used here does not need to have networking
+# working.
+TEST_START ITERATE 10 IF ${TEST} == randconfig && ${MULTI}
+TEST_TYPE = boot
+BUILD_TYPE = randconfig
+MIN_CONFIG = ${CONFIG_DIR}/config-min
+MAKE_CMD = make
+
+# This builds, installs, boots and tests the target.
+TEST_START IF ${TEST} == test
+TEST_TYPE = test
+BUILD_TYPE = ${BOOT_TYPE}
+TEST = ${RUN_TEST}
+BUILD_NOCLEAN = 1
diff --git a/tools/testing/ktest/examples/kvm.conf b/tools/testing/ktest/examples/kvm.conf
new file mode 100644
index 000000000..fbc134f9a
--- /dev/null
+++ b/tools/testing/ktest/examples/kvm.conf
@@ -0,0 +1,92 @@
+#
+# This config is an example usage of ktest.pl with a kvm guest
+#
+# The guest is called 'Guest' and this would be something that
+# could be run on the host to test a virtual machine target.
+
+MACHINE = Guest
+
+
+# Use virsh to read the serial console of the guest
+CONSOLE = virsh console ${MACHINE}
+
+# Use SIGKILL to terminate virsh console. We can't kill virsh console
+# by the default signal, SIGINT.
+CLOSE_CONSOLE_SIGNAL = KILL
+
+#*************************************#
+# This part is the same as test.conf #
+#*************************************#
+
+# The include files will set up the type of test to run. Just set TEST to
+# which test you want to run.
+#
+# TESTS = patchcheck, randconfig, boot, test, config-bisect, bisect, min-config
+#
+# See the include/*.conf files that define these tests
+#
+TEST := patchcheck
+
+# Some tests may have more than one test to run. Define MULTI := 1 to run
+# the extra tests.
+MULTI := 0
+
+# In case you want to differentiate which type of system you are testing
+BITS := 64
+
+# REBOOT = none, error, fail, empty
+# See include/defaults.conf
+REBOOT := empty
+
+
+# The defaults file will set up various settings that can be used by all
+# machine configs.
+INCLUDE include/defaults.conf
+
+
+#*************************************#
+# Now we are different from test.conf #
+#*************************************#
+
+
+# The example here assumes that Guest is running a Fedora release
+# that uses dracut for its initfs. The POST_INSTALL will be executed
+# after the install of the kernel and modules are complete.
+#
+POST_INSTALL = ${SSH} /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION
+
+# Guests sometimes get stuck on reboot. We wait 3 seconds after running
+# the reboot command and then do a full power-cycle of the guest.
+# This forces the guest to restart.
+#
+POWERCYCLE_AFTER_REBOOT = 3
+
+# We do the same after the halt command, but this time we wait 20 seconds.
+POWEROFF_AFTER_HALT = 20
+
+
+# As the defaults.conf file has a POWER_CYCLE option already defined,
+# and options can not be defined in the same section more than once
+# (all DEFAULTS sections are considered the same). We use the
+# DEFAULTS OVERRIDE to tell ktest.pl to ignore the previous defined
+# options, for the options set in the OVERRIDE section.
+#
+DEFAULTS OVERRIDE
+
+# Instead of using the default POWER_CYCLE option defined in
+# defaults.conf, we use virsh to cycle it. To do so, we destroy
+# the guest, wait 5 seconds, and then start it up again.
+# Crude, but effective.
+#
+POWER_CYCLE = virsh destroy ${MACHINE}; sleep 5; virsh start ${MACHINE}
+
+
+DEFAULTS
+
+# The following files each handle a different test case.
+# Having them included allows you to set up more than one machine and share
+# the same tests.
+INCLUDE include/patchcheck.conf
+INCLUDE include/tests.conf
+INCLUDE include/bisect.conf
+INCLUDE include/min-config.conf
diff --git a/tools/testing/ktest/examples/snowball.conf b/tools/testing/ktest/examples/snowball.conf
new file mode 100644
index 000000000..a82a3c5bc
--- /dev/null
+++ b/tools/testing/ktest/examples/snowball.conf
@@ -0,0 +1,53 @@
+# This example was used to boot the snowball ARM board.
+# See http://people.redhat.com/srostedt/ktest-embedded-2012/
+
+# PWD is a ktest.pl variable that will result in the process working
+# directory that ktest.pl is executed in.
+
+# THIS_DIR is automatically assigned the PWD of the path that generated
+# the config file. It is best to use this variable when assigning other
+# directory paths within this directory. This allows you to easily
+# move the test cases to other locations or to other machines.
+#
+THIS_DIR := /home/rostedt/work/demo/ktest-embed
+LOG_FILE = ${OUTPUT_DIR}/snowball.log
+CLEAR_LOG = 1
+MAKE_CMD = PATH=/usr/local/gcc-4.5.2-nolibc/arm-unknown-linux-gnueabi/bin:$PATH CROSS_COMPILE=arm-unknown-linux-gnueabi- make ARCH=arm
+ADD_CONFIG = ${THIS_DIR}/addconfig
+
+SCP_TO_TARGET = echo "don't do scp"
+
+TFTPBOOT := /var/lib/tftpboot
+TFTPDEF := ${TFTPBOOT}/snowball-default
+TFTPTEST := ${OUTPUT_DIR}/${BUILD_TARGET}
+
+SWITCH_TO_GOOD = cp ${TFTPDEF} ${TARGET_IMAGE}
+SWITCH_TO_TEST = cp ${TFTPTEST} ${TARGET_IMAGE}
+
+# Define each test with TEST_START
+# The config options below it will override the defaults
+TEST_START SKIP
+TEST_TYPE = boot
+BUILD_TYPE = u8500_defconfig
+BUILD_NOCLEAN = 1
+
+TEST_START
+TEST_TYPE = make_min_config
+OUTPUT_MIN_CONFIG = ${THIS_DIR}/config.newmin
+START_MIN_CONFIG = ${THIS_DIR}/config.orig
+IGNORE_CONFIG = ${THIS_DIR}/config.ignore
+BUILD_NOCLEAN = 1
+
+
+DEFAULTS
+LOCALVERSION = -test
+POWER_CYCLE = echo use the thumb luke; read a
+CONSOLE = cat ${THIS_DIR}/snowball-cat
+REBOOT_TYPE = script
+SSH_USER = root
+BUILD_OPTIONS = -j8 uImage
+BUILD_DIR = ${THIS_DIR}/linux.git
+OUTPUT_DIR = ${THIS_DIR}/snowball-build
+MACHINE = snowball
+TARGET_IMAGE = /var/lib/tftpboot/snowball-image
+BUILD_TARGET = arch/arm/boot/uImage
diff --git a/tools/testing/ktest/examples/test.conf b/tools/testing/ktest/examples/test.conf
new file mode 100644
index 000000000..b725210ef
--- /dev/null
+++ b/tools/testing/ktest/examples/test.conf
@@ -0,0 +1,62 @@
+#
+# Generic config for a machine
+#
+
+# Name your machine (the DNS name, what you ssh to)
+MACHINE = foo
+
+# BOX can be different than foo, if the machine BOX has
+# multiple partitions with different systems installed. For example,
+# you may have a i386 and x86_64 installation on a test box.
+# If this is the case, MACHINE defines the way to connect to the
+# machine, which may be different between which system the machine
+# is booting into. BOX is used for the scripts to reboot and power cycle
+# the machine, where it does not matter which system the machine boots into.
+#
+#BOX := bar
+
+# Define a way to read the console
+CONSOLE = stty -F /dev/ttyS0 115200 parodd; cat /dev/ttyS0
+
+# The include files will set up the type of test to run. Just set TEST to
+# which test you want to run.
+#
+# TESTS = patchcheck, randconfig, boot, test, config-bisect, bisect, min-config
+#
+# See the include/*.conf files that define these tests
+#
+TEST := patchcheck
+
+# Some tests may have more than one test to run. Define MULTI := 1 to run
+# the extra tests.
+MULTI := 0
+
+# In case you want to differentiate which type of system you are testing
+BITS := 64
+
+# REBOOT = none, error, fail, empty
+# See include/defaults.conf
+REBOOT := empty
+
+# The defaults file will set up various settings that can be used by all
+# machine configs.
+INCLUDE include/defaults.conf
+
+# In case you need to add a patch for a bisect or something
+#PRE_BUILD = patch -p1 < ${THIS_DIR}/fix.patch
+
+# Reset the repo after the build and remove all 'test' modules from the target
+# Notice that DO_POST_BUILD is a variable (defined by ':=') and POST_BUILD
+# is the option (defined by '=')
+
+DO_POST_BUILD := git reset --hard
+POST_BUILD = ${SSH} 'rm -rf /lib/modules/*-test*'; ${DO_POST_BUILD}
+
+# The following files each handle a different test case.
+# Having them included allows you to set up more than one machine and share
+# the same tests.
+INCLUDE include/patchcheck.conf
+INCLUDE include/tests.conf
+INCLUDE include/bisect.conf
+INCLUDE include/min-config.conf
+
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
new file mode 100755
index 000000000..406401f1a
--- /dev/null
+++ b/tools/testing/ktest/ktest.pl
@@ -0,0 +1,4425 @@
+#!/usr/bin/perl -w
+#
+# Copyright 2010 - Steven Rostedt <srostedt@redhat.com>, Red Hat Inc.
+# Licensed under the terms of the GNU GPL License version 2
+#
+
+use strict;
+use IPC::Open2;
+use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
+use File::Path qw(mkpath);
+use File::Copy qw(cp);
+use FileHandle;
+use FindBin;
+
+my $VERSION = "0.2";
+
+$| = 1;
+
+my %opt;
+my %repeat_tests;
+my %repeats;
+my %evals;
+
+#default opts
+my %default = (
+ "MAILER" => "sendmail", # default mailer
+ "EMAIL_ON_ERROR" => 1,
+ "EMAIL_WHEN_FINISHED" => 1,
+ "EMAIL_WHEN_CANCELED" => 0,
+ "EMAIL_WHEN_STARTED" => 0,
+ "NUM_TESTS" => 1,
+ "TEST_TYPE" => "build",
+ "BUILD_TYPE" => "randconfig",
+ "MAKE_CMD" => "make",
+ "CLOSE_CONSOLE_SIGNAL" => "INT",
+ "TIMEOUT" => 120,
+ "TMP_DIR" => "/tmp/ktest/\${MACHINE}",
+ "SLEEP_TIME" => 60, # sleep time between tests
+ "BUILD_NOCLEAN" => 0,
+ "REBOOT_ON_ERROR" => 0,
+ "POWEROFF_ON_ERROR" => 0,
+ "REBOOT_ON_SUCCESS" => 1,
+ "POWEROFF_ON_SUCCESS" => 0,
+ "BUILD_OPTIONS" => "",
+ "BISECT_SLEEP_TIME" => 60, # sleep time between bisects
+ "PATCHCHECK_SLEEP_TIME" => 60, # sleep time between patch checks
+ "CLEAR_LOG" => 0,
+ "BISECT_MANUAL" => 0,
+ "BISECT_SKIP" => 1,
+ "BISECT_TRIES" => 1,
+ "MIN_CONFIG_TYPE" => "boot",
+ "SUCCESS_LINE" => "login:",
+ "DETECT_TRIPLE_FAULT" => 1,
+ "NO_INSTALL" => 0,
+ "BOOTED_TIMEOUT" => 1,
+ "DIE_ON_FAILURE" => 1,
+ "SSH_EXEC" => "ssh \$SSH_USER\@\$MACHINE \$SSH_COMMAND",
+ "SCP_TO_TARGET" => "scp \$SRC_FILE \$SSH_USER\@\$MACHINE:\$DST_FILE",
+ "SCP_TO_TARGET_INSTALL" => "\${SCP_TO_TARGET}",
+ "REBOOT" => "ssh \$SSH_USER\@\$MACHINE reboot",
+ "STOP_AFTER_SUCCESS" => 10,
+ "STOP_AFTER_FAILURE" => 60,
+ "STOP_TEST_AFTER" => 600,
+ "MAX_MONITOR_WAIT" => 1800,
+ "GRUB_REBOOT" => "grub2-reboot",
+ "SYSLINUX" => "extlinux",
+ "SYSLINUX_PATH" => "/boot/extlinux",
+ "CONNECT_TIMEOUT" => 25,
+
+# required, and we will ask users if they don't have them but we keep the default
+# value something that is common.
+ "REBOOT_TYPE" => "grub",
+ "LOCALVERSION" => "-test",
+ "SSH_USER" => "root",
+ "BUILD_TARGET" => "arch/x86/boot/bzImage",
+ "TARGET_IMAGE" => "/boot/vmlinuz-test",
+
+ "LOG_FILE" => undef,
+ "IGNORE_UNUSED" => 0,
+);
+
+my $ktest_config = "ktest.conf";
+my $version;
+my $have_version = 0;
+my $machine;
+my $last_machine;
+my $ssh_user;
+my $tmpdir;
+my $builddir;
+my $outputdir;
+my $output_config;
+my $test_type;
+my $build_type;
+my $build_options;
+my $final_post_ktest;
+my $pre_ktest;
+my $post_ktest;
+my $pre_test;
+my $post_test;
+my $pre_build;
+my $post_build;
+my $pre_build_die;
+my $post_build_die;
+my $reboot_type;
+my $reboot_script;
+my $power_cycle;
+my $reboot;
+my $reboot_on_error;
+my $switch_to_good;
+my $switch_to_test;
+my $poweroff_on_error;
+my $reboot_on_success;
+my $die_on_failure;
+my $powercycle_after_reboot;
+my $poweroff_after_halt;
+my $max_monitor_wait;
+my $ssh_exec;
+my $scp_to_target;
+my $scp_to_target_install;
+my $power_off;
+my $grub_menu;
+my $last_grub_menu;
+my $grub_file;
+my $grub_number;
+my $grub_reboot;
+my $syslinux;
+my $syslinux_path;
+my $syslinux_label;
+my $target;
+my $make;
+my $pre_install;
+my $post_install;
+my $no_install;
+my $noclean;
+my $minconfig;
+my $start_minconfig;
+my $start_minconfig_defined;
+my $output_minconfig;
+my $minconfig_type;
+my $use_output_minconfig;
+my $warnings_file;
+my $ignore_config;
+my $ignore_errors;
+my $addconfig;
+my $in_bisect = 0;
+my $bisect_bad_commit = "";
+my $reverse_bisect;
+my $bisect_manual;
+my $bisect_skip;
+my $bisect_tries;
+my $config_bisect_good;
+my $bisect_ret_good;
+my $bisect_ret_bad;
+my $bisect_ret_skip;
+my $bisect_ret_abort;
+my $bisect_ret_default;
+my $in_patchcheck = 0;
+my $run_test;
+my $buildlog;
+my $testlog;
+my $dmesg;
+my $monitor_fp;
+my $monitor_pid;
+my $monitor_cnt = 0;
+my $sleep_time;
+my $bisect_sleep_time;
+my $patchcheck_sleep_time;
+my $ignore_warnings;
+my $store_failures;
+my $store_successes;
+my $test_name;
+my $timeout;
+my $connect_timeout;
+my $config_bisect_exec;
+my $booted_timeout;
+my $detect_triplefault;
+my $console;
+my $close_console_signal;
+my $reboot_success_line;
+my $success_line;
+my $stop_after_success;
+my $stop_after_failure;
+my $stop_test_after;
+my $build_target;
+my $target_image;
+my $checkout;
+my $localversion;
+my $iteration = 0;
+my $successes = 0;
+my $stty_orig;
+my $run_command_status = 0;
+
+my $bisect_good;
+my $bisect_bad;
+my $bisect_type;
+my $bisect_start;
+my $bisect_replay;
+my $bisect_files;
+my $bisect_reverse;
+my $bisect_check;
+
+my $config_bisect;
+my $config_bisect_type;
+my $config_bisect_check;
+
+my $patchcheck_type;
+my $patchcheck_start;
+my $patchcheck_cherry;
+my $patchcheck_end;
+
+my $build_time;
+my $install_time;
+my $reboot_time;
+my $test_time;
+
+my $pwd;
+my $dirname = $FindBin::Bin;
+
+my $mailto;
+my $mailer;
+my $mail_path;
+my $mail_command;
+my $email_on_error;
+my $email_when_finished;
+my $email_when_started;
+my $email_when_canceled;
+
+my $script_start_time = localtime();
+
+# set when a test is something other that just building or install
+# which would require more options.
+my $buildonly = 1;
+
+# tell build not to worry about warnings, even when WARNINGS_FILE is set
+my $warnings_ok = 0;
+
+# set when creating a new config
+my $newconfig = 0;
+
+my %entered_configs;
+my %config_help;
+my %variable;
+
+# force_config is the list of configs that we force enabled (or disabled)
+# in a .config file. The MIN_CONFIG and ADD_CONFIG configs.
+my %force_config;
+
+# do not force reboots on config problems
+my $no_reboot = 1;
+
+# reboot on success
+my $reboot_success = 0;
+
+my %option_map = (
+ "MAILTO" => \$mailto,
+ "MAILER" => \$mailer,
+ "MAIL_PATH" => \$mail_path,
+ "MAIL_COMMAND" => \$mail_command,
+ "EMAIL_ON_ERROR" => \$email_on_error,
+ "EMAIL_WHEN_FINISHED" => \$email_when_finished,
+ "EMAIL_WHEN_STARTED" => \$email_when_started,
+ "EMAIL_WHEN_CANCELED" => \$email_when_canceled,
+ "MACHINE" => \$machine,
+ "SSH_USER" => \$ssh_user,
+ "TMP_DIR" => \$tmpdir,
+ "OUTPUT_DIR" => \$outputdir,
+ "BUILD_DIR" => \$builddir,
+ "TEST_TYPE" => \$test_type,
+ "PRE_KTEST" => \$pre_ktest,
+ "POST_KTEST" => \$post_ktest,
+ "PRE_TEST" => \$pre_test,
+ "POST_TEST" => \$post_test,
+ "BUILD_TYPE" => \$build_type,
+ "BUILD_OPTIONS" => \$build_options,
+ "PRE_BUILD" => \$pre_build,
+ "POST_BUILD" => \$post_build,
+ "PRE_BUILD_DIE" => \$pre_build_die,
+ "POST_BUILD_DIE" => \$post_build_die,
+ "POWER_CYCLE" => \$power_cycle,
+ "REBOOT" => \$reboot,
+ "BUILD_NOCLEAN" => \$noclean,
+ "MIN_CONFIG" => \$minconfig,
+ "OUTPUT_MIN_CONFIG" => \$output_minconfig,
+ "START_MIN_CONFIG" => \$start_minconfig,
+ "MIN_CONFIG_TYPE" => \$minconfig_type,
+ "USE_OUTPUT_MIN_CONFIG" => \$use_output_minconfig,
+ "WARNINGS_FILE" => \$warnings_file,
+ "IGNORE_CONFIG" => \$ignore_config,
+ "TEST" => \$run_test,
+ "ADD_CONFIG" => \$addconfig,
+ "REBOOT_TYPE" => \$reboot_type,
+ "GRUB_MENU" => \$grub_menu,
+ "GRUB_FILE" => \$grub_file,
+ "GRUB_REBOOT" => \$grub_reboot,
+ "SYSLINUX" => \$syslinux,
+ "SYSLINUX_PATH" => \$syslinux_path,
+ "SYSLINUX_LABEL" => \$syslinux_label,
+ "PRE_INSTALL" => \$pre_install,
+ "POST_INSTALL" => \$post_install,
+ "NO_INSTALL" => \$no_install,
+ "REBOOT_SCRIPT" => \$reboot_script,
+ "REBOOT_ON_ERROR" => \$reboot_on_error,
+ "SWITCH_TO_GOOD" => \$switch_to_good,
+ "SWITCH_TO_TEST" => \$switch_to_test,
+ "POWEROFF_ON_ERROR" => \$poweroff_on_error,
+ "REBOOT_ON_SUCCESS" => \$reboot_on_success,
+ "DIE_ON_FAILURE" => \$die_on_failure,
+ "POWER_OFF" => \$power_off,
+ "POWERCYCLE_AFTER_REBOOT" => \$powercycle_after_reboot,
+ "POWEROFF_AFTER_HALT" => \$poweroff_after_halt,
+ "MAX_MONITOR_WAIT" => \$max_monitor_wait,
+ "SLEEP_TIME" => \$sleep_time,
+ "BISECT_SLEEP_TIME" => \$bisect_sleep_time,
+ "PATCHCHECK_SLEEP_TIME" => \$patchcheck_sleep_time,
+ "IGNORE_WARNINGS" => \$ignore_warnings,
+ "IGNORE_ERRORS" => \$ignore_errors,
+ "BISECT_MANUAL" => \$bisect_manual,
+ "BISECT_SKIP" => \$bisect_skip,
+ "BISECT_TRIES" => \$bisect_tries,
+ "CONFIG_BISECT_GOOD" => \$config_bisect_good,
+ "BISECT_RET_GOOD" => \$bisect_ret_good,
+ "BISECT_RET_BAD" => \$bisect_ret_bad,
+ "BISECT_RET_SKIP" => \$bisect_ret_skip,
+ "BISECT_RET_ABORT" => \$bisect_ret_abort,
+ "BISECT_RET_DEFAULT" => \$bisect_ret_default,
+ "STORE_FAILURES" => \$store_failures,
+ "STORE_SUCCESSES" => \$store_successes,
+ "TEST_NAME" => \$test_name,
+ "TIMEOUT" => \$timeout,
+ "CONNECT_TIMEOUT" => \$connect_timeout,
+ "CONFIG_BISECT_EXEC" => \$config_bisect_exec,
+ "BOOTED_TIMEOUT" => \$booted_timeout,
+ "CONSOLE" => \$console,
+ "CLOSE_CONSOLE_SIGNAL" => \$close_console_signal,
+ "DETECT_TRIPLE_FAULT" => \$detect_triplefault,
+ "SUCCESS_LINE" => \$success_line,
+ "REBOOT_SUCCESS_LINE" => \$reboot_success_line,
+ "STOP_AFTER_SUCCESS" => \$stop_after_success,
+ "STOP_AFTER_FAILURE" => \$stop_after_failure,
+ "STOP_TEST_AFTER" => \$stop_test_after,
+ "BUILD_TARGET" => \$build_target,
+ "SSH_EXEC" => \$ssh_exec,
+ "SCP_TO_TARGET" => \$scp_to_target,
+ "SCP_TO_TARGET_INSTALL" => \$scp_to_target_install,
+ "CHECKOUT" => \$checkout,
+ "TARGET_IMAGE" => \$target_image,
+ "LOCALVERSION" => \$localversion,
+
+ "BISECT_GOOD" => \$bisect_good,
+ "BISECT_BAD" => \$bisect_bad,
+ "BISECT_TYPE" => \$bisect_type,
+ "BISECT_START" => \$bisect_start,
+ "BISECT_REPLAY" => \$bisect_replay,
+ "BISECT_FILES" => \$bisect_files,
+ "BISECT_REVERSE" => \$bisect_reverse,
+ "BISECT_CHECK" => \$bisect_check,
+
+ "CONFIG_BISECT" => \$config_bisect,
+ "CONFIG_BISECT_TYPE" => \$config_bisect_type,
+ "CONFIG_BISECT_CHECK" => \$config_bisect_check,
+
+ "PATCHCHECK_TYPE" => \$patchcheck_type,
+ "PATCHCHECK_START" => \$patchcheck_start,
+ "PATCHCHECK_CHERRY" => \$patchcheck_cherry,
+ "PATCHCHECK_END" => \$patchcheck_end,
+);
+
+# Options may be used by other options, record them.
+my %used_options;
+
+# default variables that can be used
+chomp ($variable{"PWD"} = `pwd`);
+$pwd = $variable{"PWD"};
+
+$config_help{"MACHINE"} = << "EOF"
+ The machine hostname that you will test.
+ For build only tests, it is still needed to differentiate log files.
+EOF
+ ;
+$config_help{"SSH_USER"} = << "EOF"
+ The box is expected to have ssh on normal bootup, provide the user
+ (most likely root, since you need privileged operations)
+EOF
+ ;
+$config_help{"BUILD_DIR"} = << "EOF"
+ The directory that contains the Linux source code (full path).
+ You can use \${PWD} that will be the path where ktest.pl is run, or use
+ \${THIS_DIR} which is assigned \${PWD} but may be changed later.
+EOF
+ ;
+$config_help{"OUTPUT_DIR"} = << "EOF"
+ The directory that the objects will be built (full path).
+ (can not be same as BUILD_DIR)
+ You can use \${PWD} that will be the path where ktest.pl is run, or use
+ \${THIS_DIR} which is assigned \${PWD} but may be changed later.
+EOF
+ ;
+$config_help{"BUILD_TARGET"} = << "EOF"
+ The location of the compiled file to copy to the target.
+ (relative to OUTPUT_DIR)
+EOF
+ ;
+$config_help{"BUILD_OPTIONS"} = << "EOF"
+ Options to add to \"make\" when building.
+ i.e. -j20
+EOF
+ ;
+$config_help{"TARGET_IMAGE"} = << "EOF"
+ The place to put your image on the test machine.
+EOF
+ ;
+$config_help{"POWER_CYCLE"} = << "EOF"
+ A script or command to reboot the box.
+
+ Here is a digital loggers power switch example
+ POWER_CYCLE = wget --no-proxy -O /dev/null -q --auth-no-challenge 'http://admin:admin\@power/outlet?5=CCL'
+
+ Here is an example to reboot a virtual box on the current host
+ with the name "Guest".
+ POWER_CYCLE = virsh destroy Guest; sleep 5; virsh start Guest
+EOF
+ ;
+$config_help{"CONSOLE"} = << "EOF"
+ The script or command that reads the console
+
+ If you use ttywatch server, something like the following would work.
+CONSOLE = nc -d localhost 3001
+
+ For a virtual machine with guest name "Guest".
+CONSOLE = virsh console Guest
+EOF
+ ;
+$config_help{"LOCALVERSION"} = << "EOF"
+ Required version ending to differentiate the test
+ from other linux builds on the system.
+EOF
+ ;
+$config_help{"REBOOT_TYPE"} = << "EOF"
+ Way to reboot the box to the test kernel.
+ Only valid options so far are "grub", "grub2", "syslinux", and "script".
+
+ If you specify grub, it will assume grub version 1
+ and will search in /boot/grub/menu.lst for the title \$GRUB_MENU
+ and select that target to reboot to the kernel. If this is not
+ your setup, then specify "script" and have a command or script
+ specified in REBOOT_SCRIPT to boot to the target.
+
+ The entry in /boot/grub/menu.lst must be entered in manually.
+ The test will not modify that file.
+
+ If you specify grub2, then you also need to specify both \$GRUB_MENU
+ and \$GRUB_FILE.
+
+ If you specify syslinux, then you may use SYSLINUX to define the syslinux
+ command (defaults to extlinux), and SYSLINUX_PATH to specify the path to
+ the syslinux install (defaults to /boot/extlinux). But you have to specify
+ SYSLINUX_LABEL to define the label to boot to for the test kernel.
+EOF
+ ;
+$config_help{"GRUB_MENU"} = << "EOF"
+ The grub title name for the test kernel to boot
+ (Only mandatory if REBOOT_TYPE = grub or grub2)
+
+ Note, ktest.pl will not update the grub menu.lst, you need to
+ manually add an option for the test. ktest.pl will search
+ the grub menu.lst for this option to find what kernel to
+ reboot into.
+
+ For example, if in the /boot/grub/menu.lst the test kernel title has:
+ title Test Kernel
+ kernel vmlinuz-test
+ GRUB_MENU = Test Kernel
+
+ For grub2, a search of \$GRUB_FILE is performed for the lines
+ that begin with "menuentry". It will not detect submenus. The
+ menu must be a non-nested menu. Add the quotes used in the menu
+ to guarantee your selection, as the first menuentry with the content
+ of \$GRUB_MENU that is found will be used.
+EOF
+ ;
+$config_help{"GRUB_FILE"} = << "EOF"
+ If grub2 is used, the full path for the grub.cfg file is placed
+ here. Use something like /boot/grub2/grub.cfg to search.
+EOF
+ ;
+$config_help{"SYSLINUX_LABEL"} = << "EOF"
+ If syslinux is used, the label that boots the target kernel must
+ be specified with SYSLINUX_LABEL.
+EOF
+ ;
+$config_help{"REBOOT_SCRIPT"} = << "EOF"
+ A script to reboot the target into the test kernel
+ (Only mandatory if REBOOT_TYPE = script)
+EOF
+ ;
+
+sub _logit {
+ if (defined($opt{"LOG_FILE"})) {
+ open(OUT, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
+ print OUT @_;
+ close(OUT);
+ }
+}
+
+sub logit {
+ if (defined($opt{"LOG_FILE"})) {
+ _logit @_;
+ } else {
+ print @_;
+ }
+}
+
+sub doprint {
+ print @_;
+ _logit @_;
+}
+
+sub read_prompt {
+ my ($cancel, $prompt) = @_;
+
+ my $ans;
+
+ for (;;) {
+ if ($cancel) {
+ print "$prompt [y/n/C] ";
+ } else {
+ print "$prompt [Y/n] ";
+ }
+ $ans = <STDIN>;
+ chomp $ans;
+ if ($ans =~ /^\s*$/) {
+ if ($cancel) {
+ $ans = "c";
+ } else {
+ $ans = "y";
+ }
+ }
+ last if ($ans =~ /^y$/i || $ans =~ /^n$/i);
+ if ($cancel) {
+ last if ($ans =~ /^c$/i);
+ print "Please answer either 'y', 'n' or 'c'.\n";
+ } else {
+ print "Please answer either 'y' or 'n'.\n";
+ }
+ }
+ if ($ans =~ /^c/i) {
+ exit;
+ }
+ if ($ans !~ /^y$/i) {
+ return 0;
+ }
+ return 1;
+}
+
+sub read_yn {
+ my ($prompt) = @_;
+
+ return read_prompt 0, $prompt;
+}
+
+sub read_ync {
+ my ($prompt) = @_;
+
+ return read_prompt 1, $prompt;
+}
+
+sub get_mandatory_config {
+ my ($config) = @_;
+ my $ans;
+
+ return if (defined($opt{$config}));
+
+ if (defined($config_help{$config})) {
+ print "\n";
+ print $config_help{$config};
+ }
+
+ for (;;) {
+ print "$config = ";
+ if (defined($default{$config}) && length($default{$config})) {
+ print "\[$default{$config}\] ";
+ }
+ $ans = <STDIN>;
+ $ans =~ s/^\s*(.*\S)\s*$/$1/;
+ if ($ans =~ /^\s*$/) {
+ if ($default{$config}) {
+ $ans = $default{$config};
+ } else {
+ print "Your answer can not be blank\n";
+ next;
+ }
+ }
+ $entered_configs{$config} = ${ans};
+ last;
+ }
+}
+
+sub show_time {
+ my ($time) = @_;
+
+ my $hours = 0;
+ my $minutes = 0;
+
+ if ($time > 3600) {
+ $hours = int($time / 3600);
+ $time -= $hours * 3600;
+ }
+ if ($time > 60) {
+ $minutes = int($time / 60);
+ $time -= $minutes * 60;
+ }
+
+ if ($hours > 0) {
+ doprint "$hours hour";
+ doprint "s" if ($hours > 1);
+ doprint " ";
+ }
+
+ if ($minutes > 0) {
+ doprint "$minutes minute";
+ doprint "s" if ($minutes > 1);
+ doprint " ";
+ }
+
+ doprint "$time second";
+ doprint "s" if ($time != 1);
+}
+
+sub print_times {
+ doprint "\n";
+ if ($build_time) {
+ doprint "Build time: ";
+ show_time($build_time);
+ doprint "\n";
+ }
+ if ($install_time) {
+ doprint "Install time: ";
+ show_time($install_time);
+ doprint "\n";
+ }
+ if ($reboot_time) {
+ doprint "Reboot time: ";
+ show_time($reboot_time);
+ doprint "\n";
+ }
+ if ($test_time) {
+ doprint "Test time: ";
+ show_time($test_time);
+ doprint "\n";
+ }
+ # reset for iterations like bisect
+ $build_time = 0;
+ $install_time = 0;
+ $reboot_time = 0;
+ $test_time = 0;
+}
+
+sub get_mandatory_configs {
+ get_mandatory_config("MACHINE");
+ get_mandatory_config("BUILD_DIR");
+ get_mandatory_config("OUTPUT_DIR");
+
+ if ($newconfig) {
+ get_mandatory_config("BUILD_OPTIONS");
+ }
+
+ # options required for other than just building a kernel
+ if (!$buildonly) {
+ get_mandatory_config("POWER_CYCLE");
+ get_mandatory_config("CONSOLE");
+ }
+
+ # options required for install and more
+ if ($buildonly != 1) {
+ get_mandatory_config("SSH_USER");
+ get_mandatory_config("BUILD_TARGET");
+ get_mandatory_config("TARGET_IMAGE");
+ }
+
+ get_mandatory_config("LOCALVERSION");
+
+ return if ($buildonly);
+
+ my $rtype = $opt{"REBOOT_TYPE"};
+
+ if (!defined($rtype)) {
+ if (!defined($opt{"GRUB_MENU"})) {
+ get_mandatory_config("REBOOT_TYPE");
+ $rtype = $entered_configs{"REBOOT_TYPE"};
+ } else {
+ $rtype = "grub";
+ }
+ }
+
+ if ($rtype eq "grub") {
+ get_mandatory_config("GRUB_MENU");
+ }
+
+ if ($rtype eq "grub2") {
+ get_mandatory_config("GRUB_MENU");
+ get_mandatory_config("GRUB_FILE");
+ }
+
+ if ($rtype eq "syslinux") {
+ get_mandatory_config("SYSLINUX_LABEL");
+ }
+}
+
+sub process_variables {
+ my ($value, $remove_undef) = @_;
+ my $retval = "";
+
+ # We want to check for '\', and it is just easier
+ # to check the previous characet of '$' and not need
+ # to worry if '$' is the first character. By adding
+ # a space to $value, we can just check [^\\]\$ and
+ # it will still work.
+ $value = " $value";
+
+ while ($value =~ /(.*?[^\\])\$\{(.*?)\}(.*)/) {
+ my $begin = $1;
+ my $var = $2;
+ my $end = $3;
+ # append beginning of value to retval
+ $retval = "$retval$begin";
+ if (defined($variable{$var})) {
+ $retval = "$retval$variable{$var}";
+ } elsif (defined($remove_undef) && $remove_undef) {
+ # for if statements, any variable that is not defined,
+ # we simple convert to 0
+ $retval = "${retval}0";
+ } else {
+ # put back the origin piece.
+ $retval = "$retval\$\{$var\}";
+ # This could be an option that is used later, save
+ # it so we don't warn if this option is not one of
+ # ktests options.
+ $used_options{$var} = 1;
+ }
+ $value = $end;
+ }
+ $retval = "$retval$value";
+
+ # remove the space added in the beginning
+ $retval =~ s/ //;
+
+ return "$retval"
+}
+
+sub set_value {
+ my ($lvalue, $rvalue, $override, $overrides, $name) = @_;
+
+ my $prvalue = process_variables($rvalue);
+
+ if ($lvalue =~ /^(TEST|BISECT|CONFIG_BISECT)_TYPE(\[.*\])?$/ &&
+ $prvalue !~ /^(config_|)bisect$/ &&
+ $prvalue !~ /^build$/ &&
+ $buildonly) {
+
+ # Note if a test is something other than build, then we
+ # will need other mandatory options.
+ if ($prvalue ne "install") {
+ $buildonly = 0;
+ } else {
+ # install still limits some mandatory options.
+ $buildonly = 2;
+ }
+ }
+
+ if (defined($opt{$lvalue})) {
+ if (!$override || defined(${$overrides}{$lvalue})) {
+ my $extra = "";
+ if ($override) {
+ $extra = "In the same override section!\n";
+ }
+ die "$name: $.: Option $lvalue defined more than once!\n$extra";
+ }
+ ${$overrides}{$lvalue} = $prvalue;
+ }
+
+ $opt{$lvalue} = $prvalue;
+}
+
+sub set_eval {
+ my ($lvalue, $rvalue, $name) = @_;
+
+ my $prvalue = process_variables($rvalue);
+ my $arr;
+
+ if (defined($evals{$lvalue})) {
+ $arr = $evals{$lvalue};
+ } else {
+ $arr = [];
+ $evals{$lvalue} = $arr;
+ }
+
+ push @{$arr}, $rvalue;
+}
+
+sub set_variable {
+ my ($lvalue, $rvalue) = @_;
+
+ if ($rvalue =~ /^\s*$/) {
+ delete $variable{$lvalue};
+ } else {
+ $rvalue = process_variables($rvalue);
+ $variable{$lvalue} = $rvalue;
+ }
+}
+
+sub process_compare {
+ my ($lval, $cmp, $rval) = @_;
+
+ # remove whitespace
+
+ $lval =~ s/^\s*//;
+ $lval =~ s/\s*$//;
+
+ $rval =~ s/^\s*//;
+ $rval =~ s/\s*$//;
+
+ if ($cmp eq "==") {
+ return $lval eq $rval;
+ } elsif ($cmp eq "!=") {
+ return $lval ne $rval;
+ } elsif ($cmp eq "=~") {
+ return $lval =~ m/$rval/;
+ } elsif ($cmp eq "!~") {
+ return $lval !~ m/$rval/;
+ }
+
+ my $statement = "$lval $cmp $rval";
+ my $ret = eval $statement;
+
+ # $@ stores error of eval
+ if ($@) {
+ return -1;
+ }
+
+ return $ret;
+}
+
+sub value_defined {
+ my ($val) = @_;
+
+ return defined($variable{$2}) ||
+ defined($opt{$2});
+}
+
+my $d = 0;
+sub process_expression {
+ my ($name, $val) = @_;
+
+ my $c = $d++;
+
+ while ($val =~ s/\(([^\(]*?)\)/\&\&\&\&VAL\&\&\&\&/) {
+ my $express = $1;
+
+ if (process_expression($name, $express)) {
+ $val =~ s/\&\&\&\&VAL\&\&\&\&/ 1 /;
+ } else {
+ $val =~ s/\&\&\&\&VAL\&\&\&\&/ 0 /;
+ }
+ }
+
+ $d--;
+ my $OR = "\\|\\|";
+ my $AND = "\\&\\&";
+
+ while ($val =~ s/^(.*?)($OR|$AND)//) {
+ my $express = $1;
+ my $op = $2;
+
+ if (process_expression($name, $express)) {
+ if ($op eq "||") {
+ return 1;
+ }
+ } else {
+ if ($op eq "&&") {
+ return 0;
+ }
+ }
+ }
+
+ if ($val =~ /(.*)(==|\!=|>=|<=|>|<|=~|\!~)(.*)/) {
+ my $ret = process_compare($1, $2, $3);
+ if ($ret < 0) {
+ die "$name: $.: Unable to process comparison\n";
+ }
+ return $ret;
+ }
+
+ if ($val =~ /^\s*(NOT\s*)?DEFINED\s+(\S+)\s*$/) {
+ if (defined $1) {
+ return !value_defined($2);
+ } else {
+ return value_defined($2);
+ }
+ }
+
+ if ($val =~ /^\s*0\s*$/) {
+ return 0;
+ } elsif ($val =~ /^\s*\d+\s*$/) {
+ return 1;
+ }
+
+ die ("$name: $.: Undefined content $val in if statement\n");
+}
+
+sub process_if {
+ my ($name, $value) = @_;
+
+ # Convert variables and replace undefined ones with 0
+ my $val = process_variables($value, 1);
+ my $ret = process_expression $name, $val;
+
+ return $ret;
+}
+
+sub __read_config {
+ my ($config, $current_test_num) = @_;
+
+ my $in;
+ open($in, $config) || die "can't read file $config";
+
+ my $name = $config;
+ $name =~ s,.*/(.*),$1,;
+
+ my $test_num = $$current_test_num;
+ my $default = 1;
+ my $repeat = 1;
+ my $num_tests_set = 0;
+ my $skip = 0;
+ my $rest;
+ my $line;
+ my $test_case = 0;
+ my $if = 0;
+ my $if_set = 0;
+ my $override = 0;
+
+ my %overrides;
+
+ while (<$in>) {
+
+ # ignore blank lines and comments
+ next if (/^\s*$/ || /\s*\#/);
+
+ if (/^\s*(TEST_START|DEFAULTS)\b(.*)/) {
+
+ my $type = $1;
+ $rest = $2;
+ $line = $2;
+
+ my $old_test_num;
+ my $old_repeat;
+ $override = 0;
+
+ if ($type eq "TEST_START") {
+
+ if ($num_tests_set) {
+ die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
+ }
+
+ $old_test_num = $test_num;
+ $old_repeat = $repeat;
+
+ $test_num += $repeat;
+ $default = 0;
+ $repeat = 1;
+ } else {
+ $default = 1;
+ }
+
+ # If SKIP is anywhere in the line, the command will be skipped
+ if ($rest =~ s/\s+SKIP\b//) {
+ $skip = 1;
+ } else {
+ $test_case = 1;
+ $skip = 0;
+ }
+
+ if ($rest =~ s/\sELSE\b//) {
+ if (!$if) {
+ die "$name: $.: ELSE found with out matching IF section\n$_";
+ }
+ $if = 0;
+
+ if ($if_set) {
+ $skip = 1;
+ } else {
+ $skip = 0;
+ }
+ }
+
+ if ($rest =~ s/\sIF\s+(.*)//) {
+ if (process_if($name, $1)) {
+ $if_set = 1;
+ } else {
+ $skip = 1;
+ }
+ $if = 1;
+ } else {
+ $if = 0;
+ $if_set = 0;
+ }
+
+ if (!$skip) {
+ if ($type eq "TEST_START") {
+ if ($rest =~ s/\s+ITERATE\s+(\d+)//) {
+ $repeat = $1;
+ $repeat_tests{"$test_num"} = $repeat;
+ }
+ } elsif ($rest =~ s/\sOVERRIDE\b//) {
+ # DEFAULT only
+ $override = 1;
+ # Clear previous overrides
+ %overrides = ();
+ }
+ }
+
+ if (!$skip && $rest !~ /^\s*$/) {
+ die "$name: $.: Gargbage found after $type\n$_";
+ }
+
+ if ($skip && $type eq "TEST_START") {
+ $test_num = $old_test_num;
+ $repeat = $old_repeat;
+ }
+
+ } elsif (/^\s*ELSE\b(.*)$/) {
+ if (!$if) {
+ die "$name: $.: ELSE found with out matching IF section\n$_";
+ }
+ $rest = $1;
+ if ($if_set) {
+ $skip = 1;
+ $rest = "";
+ } else {
+ $skip = 0;
+
+ if ($rest =~ /\sIF\s+(.*)/) {
+ # May be a ELSE IF section.
+ if (process_if($name, $1)) {
+ $if_set = 1;
+ } else {
+ $skip = 1;
+ }
+ $rest = "";
+ } else {
+ $if = 0;
+ }
+ }
+
+ if ($rest !~ /^\s*$/) {
+ die "$name: $.: Gargbage found after DEFAULTS\n$_";
+ }
+
+ } elsif (/^\s*INCLUDE\s+(\S+)/) {
+
+ next if ($skip);
+
+ if (!$default) {
+ die "$name: $.: INCLUDE can only be done in default sections\n$_";
+ }
+
+ my $file = process_variables($1);
+
+ if ($file !~ m,^/,) {
+ # check the path of the config file first
+ if ($config =~ m,(.*)/,) {
+ if (-f "$1/$file") {
+ $file = "$1/$file";
+ }
+ }
+ }
+
+ if ( ! -r $file ) {
+ die "$name: $.: Can't read file $file\n$_";
+ }
+
+ if (__read_config($file, \$test_num)) {
+ $test_case = 1;
+ }
+
+ } elsif (/^\s*([A-Z_\[\]\d]+)\s*=~\s*(.*?)\s*$/) {
+
+ next if ($skip);
+
+ my $lvalue = $1;
+ my $rvalue = $2;
+
+ if ($default || $lvalue =~ /\[\d+\]$/) {
+ set_eval($lvalue, $rvalue, $name);
+ } else {
+ my $val = "$lvalue\[$test_num\]";
+ set_eval($val, $rvalue, $name);
+ }
+
+ } elsif (/^\s*([A-Z_\[\]\d]+)\s*=\s*(.*?)\s*$/) {
+
+ next if ($skip);
+
+ my $lvalue = $1;
+ my $rvalue = $2;
+
+ if (!$default &&
+ ($lvalue eq "NUM_TESTS" ||
+ $lvalue eq "LOG_FILE" ||
+ $lvalue eq "CLEAR_LOG")) {
+ die "$name: $.: $lvalue must be set in DEFAULTS section\n";
+ }
+
+ if ($lvalue eq "NUM_TESTS") {
+ if ($test_num) {
+ die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
+ }
+ if (!$default) {
+ die "$name: $.: NUM_TESTS must be set in default section\n";
+ }
+ $num_tests_set = 1;
+ }
+
+ if ($default || $lvalue =~ /\[\d+\]$/) {
+ set_value($lvalue, $rvalue, $override, \%overrides, $name);
+ } else {
+ my $val = "$lvalue\[$test_num\]";
+ set_value($val, $rvalue, $override, \%overrides, $name);
+
+ if ($repeat > 1) {
+ $repeats{$val} = $repeat;
+ }
+ }
+ } elsif (/^\s*([A-Z_\[\]\d]+)\s*:=\s*(.*?)\s*$/) {
+ next if ($skip);
+
+ my $lvalue = $1;
+ my $rvalue = $2;
+
+ # process config variables.
+ # Config variables are only active while reading the
+ # config and can be defined anywhere. They also ignore
+ # TEST_START and DEFAULTS, but are skipped if they are in
+ # on of these sections that have SKIP defined.
+ # The save variable can be
+ # defined multiple times and the new one simply overrides
+ # the prevous one.
+ set_variable($lvalue, $rvalue);
+
+ } else {
+ die "$name: $.: Garbage found in config\n$_";
+ }
+ }
+
+ if ($test_num) {
+ $test_num += $repeat - 1;
+ $opt{"NUM_TESTS"} = $test_num;
+ }
+
+ close($in);
+
+ $$current_test_num = $test_num;
+
+ return $test_case;
+}
+
+sub get_test_case {
+ print "What test case would you like to run?\n";
+ print " (build, install or boot)\n";
+ print " Other tests are available but require editing ktest.conf\n";
+ print " (see tools/testing/ktest/sample.conf)\n";
+ my $ans = <STDIN>;
+ chomp $ans;
+ $default{"TEST_TYPE"} = $ans;
+}
+
+sub read_config {
+ my ($config) = @_;
+
+ my $test_case;
+ my $test_num = 0;
+
+ $test_case = __read_config $config, \$test_num;
+
+ # make sure we have all mandatory configs
+ get_mandatory_configs;
+
+ # was a test specified?
+ if (!$test_case) {
+ print "No test case specified.\n";
+ get_test_case;
+ }
+
+ # set any defaults
+
+ foreach my $default (keys %default) {
+ if (!defined($opt{$default})) {
+ $opt{$default} = $default{$default};
+ }
+ }
+
+ if ($opt{"IGNORE_UNUSED"} == 1) {
+ return;
+ }
+
+ my %not_used;
+
+ # check if there are any stragglers (typos?)
+ foreach my $option (keys %opt) {
+ my $op = $option;
+ # remove per test labels.
+ $op =~ s/\[.*\]//;
+ if (!exists($option_map{$op}) &&
+ !exists($default{$op}) &&
+ !exists($used_options{$op})) {
+ $not_used{$op} = 1;
+ }
+ }
+
+ if (%not_used) {
+ my $s = "s are";
+ $s = " is" if (keys %not_used == 1);
+ print "The following option$s not used; could be a typo:\n";
+ foreach my $option (keys %not_used) {
+ print "$option\n";
+ }
+ print "Set IGRNORE_UNUSED = 1 to have ktest ignore unused variables\n";
+ if (!read_yn "Do you want to continue?") {
+ exit -1;
+ }
+ }
+}
+
+sub __eval_option {
+ my ($name, $option, $i) = @_;
+
+ # Add space to evaluate the character before $
+ $option = " $option";
+ my $retval = "";
+ my $repeated = 0;
+ my $parent = 0;
+
+ foreach my $test (keys %repeat_tests) {
+ if ($i >= $test &&
+ $i < $test + $repeat_tests{$test}) {
+
+ $repeated = 1;
+ $parent = $test;
+ last;
+ }
+ }
+
+ while ($option =~ /(.*?[^\\])\$\{(.*?)\}(.*)/) {
+ my $start = $1;
+ my $var = $2;
+ my $end = $3;
+
+ # Append beginning of line
+ $retval = "$retval$start";
+
+ # If the iteration option OPT[$i] exists, then use that.
+ # otherwise see if the default OPT (without [$i]) exists.
+
+ my $o = "$var\[$i\]";
+ my $parento = "$var\[$parent\]";
+
+ # If a variable contains itself, use the default var
+ if (($var eq $name) && defined($opt{$var})) {
+ $o = $opt{$var};
+ $retval = "$retval$o";
+ } elsif (defined($opt{$o})) {
+ $o = $opt{$o};
+ $retval = "$retval$o";
+ } elsif ($repeated && defined($opt{$parento})) {
+ $o = $opt{$parento};
+ $retval = "$retval$o";
+ } elsif (defined($opt{$var})) {
+ $o = $opt{$var};
+ $retval = "$retval$o";
+ } elsif ($var eq "KERNEL_VERSION" && defined($make)) {
+ # special option KERNEL_VERSION uses kernel version
+ get_version();
+ $retval = "$retval$version";
+ } else {
+ $retval = "$retval\$\{$var\}";
+ }
+
+ $option = $end;
+ }
+
+ $retval = "$retval$option";
+
+ $retval =~ s/^ //;
+
+ return $retval;
+}
+
+sub process_evals {
+ my ($name, $option, $i) = @_;
+
+ my $option_name = "$name\[$i\]";
+ my $ev;
+
+ my $old_option = $option;
+
+ if (defined($evals{$option_name})) {
+ $ev = $evals{$option_name};
+ } elsif (defined($evals{$name})) {
+ $ev = $evals{$name};
+ } else {
+ return $option;
+ }
+
+ for my $e (@{$ev}) {
+ eval "\$option =~ $e";
+ }
+
+ if ($option ne $old_option) {
+ doprint("$name changed from '$old_option' to '$option'\n");
+ }
+
+ return $option;
+}
+
+sub eval_option {
+ my ($name, $option, $i) = @_;
+
+ my $prev = "";
+
+ # Since an option can evaluate to another option,
+ # keep iterating until we do not evaluate any more
+ # options.
+ my $r = 0;
+ while ($prev ne $option) {
+ # Check for recursive evaluations.
+ # 100 deep should be more than enough.
+ if ($r++ > 100) {
+ die "Over 100 evaluations accurred with $option\n" .
+ "Check for recursive variables\n";
+ }
+ $prev = $option;
+ $option = __eval_option($name, $option, $i);
+ }
+
+ $option = process_evals($name, $option, $i);
+
+ return $option;
+}
+
+sub run_command;
+sub start_monitor;
+sub end_monitor;
+sub wait_for_monitor;
+
+sub reboot {
+ my ($time) = @_;
+ my $powercycle = 0;
+
+ # test if the machine can be connected to within a few seconds
+ my $stat = run_ssh("echo check machine status", $connect_timeout);
+ if (!$stat) {
+ doprint("power cycle\n");
+ $powercycle = 1;
+ }
+
+ if ($powercycle) {
+ run_command "$power_cycle";
+
+ start_monitor;
+ # flush out current monitor
+ # May contain the reboot success line
+ wait_for_monitor 1;
+
+ } else {
+ # Make sure everything has been written to disk
+ run_ssh("sync", 10);
+
+ if (defined($time)) {
+ start_monitor;
+ # flush out current monitor
+ # May contain the reboot success line
+ wait_for_monitor 1;
+ }
+
+ # try to reboot normally
+ if (run_command $reboot) {
+ if (defined($powercycle_after_reboot)) {
+ sleep $powercycle_after_reboot;
+ run_command "$power_cycle";
+ }
+ } else {
+ # nope? power cycle it.
+ run_command "$power_cycle";
+ }
+ }
+
+ if (defined($time)) {
+
+ # We only want to get to the new kernel, don't fail
+ # if we stumble over a call trace.
+ my $save_ignore_errors = $ignore_errors;
+ $ignore_errors = 1;
+
+ # Look for the good kernel to boot
+ if (wait_for_monitor($time, "Linux version")) {
+ # reboot got stuck?
+ doprint "Reboot did not finish. Forcing power cycle\n";
+ run_command "$power_cycle";
+ }
+
+ $ignore_errors = $save_ignore_errors;
+
+ # Still need to wait for the reboot to finish
+ wait_for_monitor($time, $reboot_success_line);
+
+ end_monitor;
+ }
+}
+
+sub reboot_to_good {
+ my ($time) = @_;
+
+ if (defined($switch_to_good)) {
+ run_command $switch_to_good;
+ }
+
+ reboot $time;
+}
+
+sub do_not_reboot {
+ my $i = $iteration;
+
+ return $test_type eq "build" || $no_reboot ||
+ ($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") ||
+ ($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build") ||
+ ($test_type eq "config_bisect" && $opt{"CONFIG_BISECT_TYPE[$i]"} eq "build");
+}
+
+my $in_die = 0;
+
+sub dodie {
+
+ # avoid recusion
+ return if ($in_die);
+ $in_die = 1;
+
+ doprint "CRITICAL FAILURE... ", @_, "\n";
+
+ my $i = $iteration;
+
+ if ($reboot_on_error && !do_not_reboot) {
+
+ doprint "REBOOTING\n";
+ reboot_to_good;
+
+ } elsif ($poweroff_on_error && defined($power_off)) {
+ doprint "POWERING OFF\n";
+ `$power_off`;
+ }
+
+ if (defined($opt{"LOG_FILE"})) {
+ print " See $opt{LOG_FILE} for more info.\n";
+ }
+
+ if ($email_on_error) {
+ send_email("KTEST: critical failure for your [$test_type] test",
+ "Your test started at $script_start_time has failed with:\n@_\n");
+ }
+
+ if ($monitor_cnt) {
+ # restore terminal settings
+ system("stty $stty_orig");
+ }
+
+ if (defined($post_test)) {
+ run_command $post_test;
+ }
+
+ die @_, "\n";
+}
+
+sub create_pty {
+ my ($ptm, $pts) = @_;
+ my $tmp;
+ my $TIOCSPTLCK = 0x40045431;
+ my $TIOCGPTN = 0x80045430;
+
+ sysopen($ptm, "/dev/ptmx", O_RDWR | O_NONBLOCK) or
+ dodie "Cant open /dev/ptmx";
+
+ # unlockpt()
+ $tmp = pack("i", 0);
+ ioctl($ptm, $TIOCSPTLCK, $tmp) or
+ dodie "ioctl TIOCSPTLCK for /dev/ptmx failed";
+
+ # ptsname()
+ ioctl($ptm, $TIOCGPTN, $tmp) or
+ dodie "ioctl TIOCGPTN for /dev/ptmx failed";
+ $tmp = unpack("i", $tmp);
+
+ sysopen($pts, "/dev/pts/$tmp", O_RDWR | O_NONBLOCK) or
+ dodie "Can't open /dev/pts/$tmp";
+}
+
+sub exec_console {
+ my ($ptm, $pts) = @_;
+
+ close($ptm);
+
+ close(\*STDIN);
+ close(\*STDOUT);
+ close(\*STDERR);
+
+ open(\*STDIN, '<&', $pts);
+ open(\*STDOUT, '>&', $pts);
+ open(\*STDERR, '>&', $pts);
+
+ close($pts);
+
+ exec $console or
+ dodie "Can't open console $console";
+}
+
+sub open_console {
+ my ($ptm) = @_;
+ my $pts = \*PTSFD;
+ my $pid;
+
+ # save terminal settings
+ $stty_orig = `stty -g`;
+
+ # place terminal in cbreak mode so that stdin can be read one character at
+ # a time without having to wait for a newline
+ system("stty -icanon -echo -icrnl");
+
+ create_pty($ptm, $pts);
+
+ $pid = fork;
+
+ if (!$pid) {
+ # child
+ exec_console($ptm, $pts)
+ }
+
+ # parent
+ close($pts);
+
+ return $pid;
+
+ open(PTSFD, "Stop perl from warning about single use of PTSFD");
+}
+
+sub close_console {
+ my ($fp, $pid) = @_;
+
+ doprint "kill child process $pid\n";
+ kill $close_console_signal, $pid;
+
+ doprint "wait for child process $pid to exit\n";
+ waitpid($pid, 0);
+
+ print "closing!\n";
+ close($fp);
+
+ # restore terminal settings
+ system("stty $stty_orig");
+}
+
+sub start_monitor {
+ if ($monitor_cnt++) {
+ return;
+ }
+ $monitor_fp = \*MONFD;
+ $monitor_pid = open_console $monitor_fp;
+
+ return;
+
+ open(MONFD, "Stop perl from warning about single use of MONFD");
+}
+
+sub end_monitor {
+ return if (!defined $console);
+ if (--$monitor_cnt) {
+ return;
+ }
+ close_console($monitor_fp, $monitor_pid);
+}
+
+sub wait_for_monitor {
+ my ($time, $stop) = @_;
+ my $full_line = "";
+ my $line;
+ my $booted = 0;
+ my $start_time = time;
+ my $skip_call_trace = 0;
+ my $bug = 0;
+ my $bug_ignored = 0;
+ my $now;
+
+ doprint "** Wait for monitor to settle down **\n";
+
+ # read the monitor and wait for the system to calm down
+ while (!$booted) {
+ $line = wait_for_input($monitor_fp, $time);
+ last if (!defined($line));
+ print "$line";
+ $full_line .= $line;
+
+ if (defined($stop) && $full_line =~ /$stop/) {
+ doprint "wait for monitor detected $stop\n";
+ $booted = 1;
+ }
+
+ if ($full_line =~ /\[ backtrace testing \]/) {
+ $skip_call_trace = 1;
+ }
+
+ if ($full_line =~ /call trace:/i) {
+ if (!$bug && !$skip_call_trace) {
+ if ($ignore_errors) {
+ $bug_ignored = 1;
+ } else {
+ $bug = 1;
+ }
+ }
+ }
+
+ if ($full_line =~ /\[ end of backtrace testing \]/) {
+ $skip_call_trace = 0;
+ }
+
+ if ($full_line =~ /Kernel panic -/) {
+ $bug = 1;
+ }
+
+ if ($line =~ /\n/) {
+ $full_line = "";
+ }
+ $now = time;
+ if ($now - $start_time >= $max_monitor_wait) {
+ doprint "Exiting monitor flush due to hitting MAX_MONITOR_WAIT\n";
+ return 1;
+ }
+ }
+ print "** Monitor flushed **\n";
+
+ # if stop is defined but wasn't hit, return error
+ # used by reboot (which wants to see a reboot)
+ if (defined($stop) && !$booted) {
+ $bug = 1;
+ }
+ return $bug;
+}
+
+sub save_logs {
+ my ($result, $basedir) = @_;
+ my @t = localtime;
+ my $date = sprintf "%04d%02d%02d%02d%02d%02d",
+ 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0];
+
+ my $type = $build_type;
+ if ($type =~ /useconfig/) {
+ $type = "useconfig";
+ }
+
+ my $dir = "$machine-$test_type-$type-$result-$date";
+
+ $dir = "$basedir/$dir";
+
+ if (!-d $dir) {
+ mkpath($dir) or
+ dodie "can't create $dir";
+ }
+
+ my %files = (
+ "config" => $output_config,
+ "buildlog" => $buildlog,
+ "dmesg" => $dmesg,
+ "testlog" => $testlog,
+ );
+
+ while (my ($name, $source) = each(%files)) {
+ if (-f "$source") {
+ cp "$source", "$dir/$name" or
+ dodie "failed to copy $source";
+ }
+ }
+
+ doprint "*** Saved info to $dir ***\n";
+}
+
+sub fail {
+
+ if ($die_on_failure) {
+ dodie @_;
+ }
+
+ doprint "FAILED\n";
+
+ my $i = $iteration;
+
+ # no need to reboot for just building.
+ if (!do_not_reboot) {
+ doprint "REBOOTING\n";
+ reboot_to_good $sleep_time;
+ }
+
+ my $name = "";
+
+ if (defined($test_name)) {
+ $name = " ($test_name)";
+ }
+
+ print_times;
+
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+ doprint "KTEST RESULT: TEST $i$name Failed: ", @_, "\n";
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+
+ if (defined($store_failures)) {
+ save_logs "fail", $store_failures;
+ }
+
+ if (defined($post_test)) {
+ run_command $post_test;
+ }
+
+ return 1;
+}
+
+sub run_command {
+ my ($command, $redirect, $timeout) = @_;
+ my $start_time;
+ my $end_time;
+ my $dolog = 0;
+ my $dord = 0;
+ my $dostdout = 0;
+ my $pid;
+
+ $command =~ s/\$SSH_USER/$ssh_user/g;
+ $command =~ s/\$MACHINE/$machine/g;
+
+ doprint("$command ... ");
+ $start_time = time;
+
+ $pid = open(CMD, "$command 2>&1 |") or
+ (fail "unable to exec $command" and return 0);
+
+ if (defined($opt{"LOG_FILE"})) {
+ open(LOG, ">>$opt{LOG_FILE}") or
+ dodie "failed to write to log";
+ $dolog = 1;
+ }
+
+ if (defined($redirect)) {
+ if ($redirect eq 1) {
+ $dostdout = 1;
+ # Have the output of the command on its own line
+ doprint "\n";
+ } else {
+ open (RD, ">$redirect") or
+ dodie "failed to write to redirect $redirect";
+ $dord = 1;
+ }
+ }
+
+ my $hit_timeout = 0;
+
+ while (1) {
+ my $fp = \*CMD;
+ if (defined($timeout)) {
+ doprint "timeout = $timeout\n";
+ }
+ my $line = wait_for_input($fp, $timeout);
+ if (!defined($line)) {
+ my $now = time;
+ if (defined($timeout) && (($now - $start_time) >= $timeout)) {
+ doprint "Hit timeout of $timeout, killing process\n";
+ $hit_timeout = 1;
+ kill 9, $pid;
+ }
+ last;
+ }
+ print LOG $line if ($dolog);
+ print RD $line if ($dord);
+ print $line if ($dostdout);
+ }
+
+ waitpid($pid, 0);
+ # shift 8 for real exit status
+ $run_command_status = $? >> 8;
+
+ close(CMD);
+ close(LOG) if ($dolog);
+ close(RD) if ($dord);
+
+ $end_time = time;
+ my $delta = $end_time - $start_time;
+
+ if ($delta == 1) {
+ doprint "[1 second] ";
+ } else {
+ doprint "[$delta seconds] ";
+ }
+
+ if ($hit_timeout) {
+ $run_command_status = 1;
+ }
+
+ if ($run_command_status) {
+ doprint "FAILED!\n";
+ } else {
+ doprint "SUCCESS\n";
+ }
+
+ return !$run_command_status;
+}
+
+sub run_ssh {
+ my ($cmd, $timeout) = @_;
+ my $cp_exec = $ssh_exec;
+
+ $cp_exec =~ s/\$SSH_COMMAND/$cmd/g;
+ return run_command "$cp_exec", undef , $timeout;
+}
+
+sub run_scp {
+ my ($src, $dst, $cp_scp) = @_;
+
+ $cp_scp =~ s/\$SRC_FILE/$src/g;
+ $cp_scp =~ s/\$DST_FILE/$dst/g;
+
+ return run_command "$cp_scp";
+}
+
+sub run_scp_install {
+ my ($src, $dst) = @_;
+
+ my $cp_scp = $scp_to_target_install;
+
+ return run_scp($src, $dst, $cp_scp);
+}
+
+sub run_scp_mod {
+ my ($src, $dst) = @_;
+
+ my $cp_scp = $scp_to_target;
+
+ return run_scp($src, $dst, $cp_scp);
+}
+
+sub get_grub2_index {
+
+ return if (defined($grub_number) && defined($last_grub_menu) &&
+ $last_grub_menu eq $grub_menu && defined($last_machine) &&
+ $last_machine eq $machine);
+
+ doprint "Find grub2 menu ... ";
+ $grub_number = -1;
+
+ my $ssh_grub = $ssh_exec;
+ $ssh_grub =~ s,\$SSH_COMMAND,cat $grub_file,g;
+
+ open(IN, "$ssh_grub |")
+ or dodie "unable to get $grub_file";
+
+ my $found = 0;
+
+ while (<IN>) {
+ if (/^menuentry.*$grub_menu/) {
+ $grub_number++;
+ $found = 1;
+ last;
+ } elsif (/^menuentry\s|^submenu\s/) {
+ $grub_number++;
+ }
+ }
+ close(IN);
+
+ dodie "Could not find '$grub_menu' in $grub_file on $machine"
+ if (!$found);
+ doprint "$grub_number\n";
+ $last_grub_menu = $grub_menu;
+ $last_machine = $machine;
+}
+
+sub get_grub_index {
+
+ if ($reboot_type eq "grub2") {
+ get_grub2_index;
+ return;
+ }
+
+ if ($reboot_type ne "grub") {
+ return;
+ }
+ return if (defined($grub_number) && defined($last_grub_menu) &&
+ $last_grub_menu eq $grub_menu && defined($last_machine) &&
+ $last_machine eq $machine);
+
+ doprint "Find grub menu ... ";
+ $grub_number = -1;
+
+ my $ssh_grub = $ssh_exec;
+ $ssh_grub =~ s,\$SSH_COMMAND,cat /boot/grub/menu.lst,g;
+
+ open(IN, "$ssh_grub |")
+ or dodie "unable to get menu.lst";
+
+ my $found = 0;
+
+ while (<IN>) {
+ if (/^\s*title\s+$grub_menu\s*$/) {
+ $grub_number++;
+ $found = 1;
+ last;
+ } elsif (/^\s*title\s/) {
+ $grub_number++;
+ }
+ }
+ close(IN);
+
+ dodie "Could not find '$grub_menu' in /boot/grub/menu on $machine"
+ if (!$found);
+ doprint "$grub_number\n";
+ $last_grub_menu = $grub_menu;
+ $last_machine = $machine;
+}
+
+sub wait_for_input
+{
+ my ($fp, $time) = @_;
+ my $start_time;
+ my $rin;
+ my $rout;
+ my $nr;
+ my $buf;
+ my $line;
+ my $ch;
+
+ if (!defined($time)) {
+ $time = $timeout;
+ }
+
+ $rin = '';
+ vec($rin, fileno($fp), 1) = 1;
+ vec($rin, fileno(\*STDIN), 1) = 1;
+
+ $start_time = time;
+
+ while (1) {
+ $nr = select($rout=$rin, undef, undef, $time);
+
+ last if ($nr <= 0);
+
+ # copy data from stdin to the console
+ if (vec($rout, fileno(\*STDIN), 1) == 1) {
+ $nr = sysread(\*STDIN, $buf, 1000);
+ syswrite($fp, $buf, $nr) if ($nr > 0);
+ }
+
+ # The timeout is based on time waiting for the fp data
+ if (vec($rout, fileno($fp), 1) != 1) {
+ last if (defined($time) && (time - $start_time > $time));
+ next;
+ }
+
+ $line = "";
+
+ # try to read one char at a time
+ while (sysread $fp, $ch, 1) {
+ $line .= $ch;
+ last if ($ch eq "\n");
+ }
+
+ last if (!length($line));
+
+ return $line;
+ }
+ return undef;
+}
+
+sub reboot_to {
+ if (defined($switch_to_test)) {
+ run_command $switch_to_test;
+ }
+
+ if ($reboot_type eq "grub") {
+ run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'";
+ } elsif ($reboot_type eq "grub2") {
+ run_ssh "$grub_reboot $grub_number";
+ } elsif ($reboot_type eq "syslinux") {
+ run_ssh "$syslinux --once \\\"$syslinux_label\\\" $syslinux_path";
+ } elsif (defined $reboot_script) {
+ run_command "$reboot_script";
+ }
+ reboot;
+}
+
+sub get_sha1 {
+ my ($commit) = @_;
+
+ doprint "git rev-list --max-count=1 $commit ... ";
+ my $sha1 = `git rev-list --max-count=1 $commit`;
+ my $ret = $?;
+
+ logit $sha1;
+
+ if ($ret) {
+ doprint "FAILED\n";
+ dodie "Failed to get git $commit";
+ }
+
+ print "SUCCESS\n";
+
+ chomp $sha1;
+
+ return $sha1;
+}
+
+sub monitor {
+ my $booted = 0;
+ my $bug = 0;
+ my $bug_ignored = 0;
+ my $skip_call_trace = 0;
+ my $loops;
+
+ my $start_time = time;
+
+ wait_for_monitor 5;
+
+ my $line;
+ my $full_line = "";
+
+ open(DMESG, "> $dmesg") or
+ dodie "unable to write to $dmesg";
+
+ reboot_to;
+
+ my $success_start;
+ my $failure_start;
+ my $monitor_start = time;
+ my $done = 0;
+ my $version_found = 0;
+
+ while (!$done) {
+
+ if ($bug && defined($stop_after_failure) &&
+ $stop_after_failure >= 0) {
+ my $time = $stop_after_failure - (time - $failure_start);
+ $line = wait_for_input($monitor_fp, $time);
+ if (!defined($line)) {
+ doprint "bug timed out after $booted_timeout seconds\n";
+ doprint "Test forced to stop after $stop_after_failure seconds after failure\n";
+ last;
+ }
+ } elsif ($booted) {
+ $line = wait_for_input($monitor_fp, $booted_timeout);
+ if (!defined($line)) {
+ my $s = $booted_timeout == 1 ? "" : "s";
+ doprint "Successful boot found: break after $booted_timeout second$s\n";
+ last;
+ }
+ } else {
+ $line = wait_for_input($monitor_fp);
+ if (!defined($line)) {
+ my $s = $timeout == 1 ? "" : "s";
+ doprint "Timed out after $timeout second$s\n";
+ last;
+ }
+ }
+
+ doprint $line;
+ print DMESG $line;
+
+ # we are not guaranteed to get a full line
+ $full_line .= $line;
+
+ if ($full_line =~ /$success_line/) {
+ $booted = 1;
+ $success_start = time;
+ }
+
+ if ($booted && defined($stop_after_success) &&
+ $stop_after_success >= 0) {
+ my $now = time;
+ if ($now - $success_start >= $stop_after_success) {
+ doprint "Test forced to stop after $stop_after_success seconds after success\n";
+ last;
+ }
+ }
+
+ if ($full_line =~ /\[ backtrace testing \]/) {
+ $skip_call_trace = 1;
+ }
+
+ if ($full_line =~ /call trace:/i) {
+ if (!$bug && !$skip_call_trace) {
+ if ($ignore_errors) {
+ $bug_ignored = 1;
+ } else {
+ $bug = 1;
+ $failure_start = time;
+ }
+ }
+ }
+
+ if ($bug && defined($stop_after_failure) &&
+ $stop_after_failure >= 0) {
+ my $now = time;
+ if ($now - $failure_start >= $stop_after_failure) {
+ doprint "Test forced to stop after $stop_after_failure seconds after failure\n";
+ last;
+ }
+ }
+
+ if ($full_line =~ /\[ end of backtrace testing \]/) {
+ $skip_call_trace = 0;
+ }
+
+ if ($full_line =~ /Kernel panic -/) {
+ $failure_start = time;
+ $bug = 1;
+ }
+
+ # Detect triple faults by testing the banner
+ if ($full_line =~ /\bLinux version (\S+).*\n/) {
+ if ($1 eq $version) {
+ $version_found = 1;
+ } elsif ($version_found && $detect_triplefault) {
+ # We already booted into the kernel we are testing,
+ # but now we booted into another kernel?
+ # Consider this a triple fault.
+ doprint "Already booted in Linux kernel $version, but now\n";
+ doprint "we booted into Linux kernel $1.\n";
+ doprint "Assuming that this is a triple fault.\n";
+ doprint "To disable this: set DETECT_TRIPLE_FAULT to 0\n";
+ last;
+ }
+ }
+
+ if ($line =~ /\n/) {
+ $full_line = "";
+ }
+
+ if ($stop_test_after > 0 && !$booted && !$bug) {
+ if (time - $monitor_start > $stop_test_after) {
+ doprint "STOP_TEST_AFTER ($stop_test_after seconds) timed out\n";
+ $done = 1;
+ }
+ }
+ }
+
+ my $end_time = time;
+ $reboot_time = $end_time - $start_time;
+
+ close(DMESG);
+
+ if ($bug) {
+ return 0 if ($in_bisect);
+ fail "failed - got a bug report" and return 0;
+ }
+
+ if (!$booted) {
+ return 0 if ($in_bisect);
+ fail "failed - never got a boot prompt." and return 0;
+ }
+
+ if ($bug_ignored) {
+ doprint "WARNING: Call Trace detected but ignored due to IGNORE_ERRORS=1\n";
+ }
+
+ return 1;
+}
+
+sub eval_kernel_version {
+ my ($option) = @_;
+
+ $option =~ s/\$KERNEL_VERSION/$version/g;
+
+ return $option;
+}
+
+sub do_post_install {
+
+ return if (!defined($post_install));
+
+ my $cp_post_install = eval_kernel_version $post_install;
+ run_command "$cp_post_install" or
+ dodie "Failed to run post install";
+}
+
+# Sometimes the reboot fails, and will hang. We try to ssh to the box
+# and if we fail, we force another reboot, that should powercycle it.
+sub test_booted {
+ if (!run_ssh "echo testing connection") {
+ reboot $sleep_time;
+ }
+}
+
+sub install {
+
+ return if ($no_install);
+
+ my $start_time = time;
+
+ if (defined($pre_install)) {
+ my $cp_pre_install = eval_kernel_version $pre_install;
+ run_command "$cp_pre_install" or
+ dodie "Failed to run pre install";
+ }
+
+ my $cp_target = eval_kernel_version $target_image;
+
+ test_booted;
+
+ run_scp_install "$outputdir/$build_target", "$cp_target" or
+ dodie "failed to copy image";
+
+ my $install_mods = 0;
+
+ # should we process modules?
+ $install_mods = 0;
+ open(IN, "$output_config") or dodie("Can't read config file");
+ while (<IN>) {
+ if (/CONFIG_MODULES(=y)?/) {
+ if (defined($1)) {
+ $install_mods = 1;
+ last;
+ }
+ }
+ }
+ close(IN);
+
+ if (!$install_mods) {
+ do_post_install;
+ doprint "No modules needed\n";
+ my $end_time = time;
+ $install_time = $end_time - $start_time;
+ return;
+ }
+
+ run_command "$make INSTALL_MOD_STRIP=1 INSTALL_MOD_PATH=$tmpdir modules_install" or
+ dodie "Failed to install modules";
+
+ my $modlib = "/lib/modules/$version";
+ my $modtar = "ktest-mods.tar.bz2";
+
+ run_ssh "rm -rf $modlib" or
+ dodie "failed to remove old mods: $modlib";
+
+ # would be nice if scp -r did not follow symbolic links
+ run_command "cd $tmpdir && tar -cjf $modtar lib/modules/$version" or
+ dodie "making tarball";
+
+ run_scp_mod "$tmpdir/$modtar", "/tmp" or
+ dodie "failed to copy modules";
+
+ unlink "$tmpdir/$modtar";
+
+ run_ssh "'(cd / && tar xjf /tmp/$modtar)'" or
+ dodie "failed to tar modules";
+
+ run_ssh "rm -f /tmp/$modtar";
+
+ do_post_install;
+
+ my $end_time = time;
+ $install_time = $end_time - $start_time;
+}
+
+sub get_version {
+ # get the release name
+ return if ($have_version);
+ doprint "$make kernelrelease ... ";
+ $version = `$make -s kernelrelease | tail -1`;
+ chomp($version);
+ doprint "$version\n";
+ $have_version = 1;
+}
+
+sub start_monitor_and_install {
+ # Make sure the stable kernel has finished booting
+
+ # Install bisects, don't need console
+ if (defined $console) {
+ start_monitor;
+ wait_for_monitor 5;
+ end_monitor;
+ }
+
+ get_grub_index;
+ get_version;
+ install;
+
+ start_monitor if (defined $console);
+ return monitor;
+}
+
+my $check_build_re = ".*:.*(warning|error|Error):.*";
+my $utf8_quote = "\\x{e2}\\x{80}(\\x{98}|\\x{99})";
+
+sub process_warning_line {
+ my ($line) = @_;
+
+ chomp $line;
+
+ # for distcc heterogeneous systems, some compilers
+ # do things differently causing warning lines
+ # to be slightly different. This makes an attempt
+ # to fixe those issues.
+
+ # chop off the index into the line
+ # using distcc, some compilers give different indexes
+ # depending on white space
+ $line =~ s/^(\s*\S+:\d+:)\d+/$1/;
+
+ # Some compilers use UTF-8 extended for quotes and some don't.
+ $line =~ s/$utf8_quote/'/g;
+
+ return $line;
+}
+
+# Read buildlog and check against warnings file for any
+# new warnings.
+#
+# Returns 1 if OK
+# 0 otherwise
+sub check_buildlog {
+ return 1 if (!defined $warnings_file);
+
+ my %warnings_list;
+
+ # Failed builds should not reboot the target
+ my $save_no_reboot = $no_reboot;
+ $no_reboot = 1;
+
+ if (-f $warnings_file) {
+ open(IN, $warnings_file) or
+ dodie "Error opening $warnings_file";
+
+ while (<IN>) {
+ if (/$check_build_re/) {
+ my $warning = process_warning_line $_;
+
+ $warnings_list{$warning} = 1;
+ }
+ }
+ close(IN);
+ }
+
+ # If warnings file didn't exist, and WARNINGS_FILE exist,
+ # then we fail on any warning!
+
+ open(IN, $buildlog) or dodie "Can't open $buildlog";
+ while (<IN>) {
+ if (/$check_build_re/) {
+ my $warning = process_warning_line $_;
+
+ if (!defined $warnings_list{$warning}) {
+ fail "New warning found (not in $warnings_file)\n$_\n";
+ $no_reboot = $save_no_reboot;
+ return 0;
+ }
+ }
+ }
+ $no_reboot = $save_no_reboot;
+ close(IN);
+}
+
+sub check_patch_buildlog {
+ my ($patch) = @_;
+
+ my @files = `git show $patch | diffstat -l`;
+
+ foreach my $file (@files) {
+ chomp $file;
+ }
+
+ open(IN, "git show $patch |") or
+ dodie "failed to show $patch";
+ while (<IN>) {
+ if (m,^--- a/(.*),) {
+ chomp $1;
+ $files[$#files] = $1;
+ }
+ }
+ close(IN);
+
+ open(IN, $buildlog) or dodie "Can't open $buildlog";
+ while (<IN>) {
+ if (/^\s*(.*?):.*(warning|error)/) {
+ my $err = $1;
+ foreach my $file (@files) {
+ my $fullpath = "$builddir/$file";
+ if ($file eq $err || $fullpath eq $err) {
+ fail "$file built with warnings" and return 0;
+ }
+ }
+ }
+ }
+ close(IN);
+
+ return 1;
+}
+
+sub apply_min_config {
+ my $outconfig = "$output_config.new";
+
+ # Read the config file and remove anything that
+ # is in the force_config hash (from minconfig and others)
+ # then add the force config back.
+
+ doprint "Applying minimum configurations into $output_config.new\n";
+
+ open (OUT, ">$outconfig") or
+ dodie "Can't create $outconfig";
+
+ if (-f $output_config) {
+ open (IN, $output_config) or
+ dodie "Failed to open $output_config";
+ while (<IN>) {
+ if (/^(# )?(CONFIG_[^\s=]*)/) {
+ next if (defined($force_config{$2}));
+ }
+ print OUT;
+ }
+ close IN;
+ }
+ foreach my $config (keys %force_config) {
+ print OUT "$force_config{$config}\n";
+ }
+ close OUT;
+
+ run_command "mv $outconfig $output_config";
+}
+
+sub make_oldconfig {
+
+ my @force_list = keys %force_config;
+
+ if ($#force_list >= 0) {
+ apply_min_config;
+ }
+
+ if (!run_command "$make olddefconfig") {
+ # Perhaps olddefconfig doesn't exist in this version of the kernel
+ # try oldnoconfig
+ doprint "olddefconfig failed, trying make oldnoconfig\n";
+ if (!run_command "$make oldnoconfig") {
+ doprint "oldnoconfig failed, trying yes '' | make oldconfig\n";
+ # try a yes '' | oldconfig
+ run_command "yes '' | $make oldconfig" or
+ dodie "failed make config oldconfig";
+ }
+ }
+}
+
+# read a config file and use this to force new configs.
+sub load_force_config {
+ my ($config) = @_;
+
+ doprint "Loading force configs from $config\n";
+ open(IN, $config) or
+ dodie "failed to read $config";
+ while (<IN>) {
+ chomp;
+ if (/^(CONFIG[^\s=]*)(\s*=.*)/) {
+ $force_config{$1} = $_;
+ } elsif (/^# (CONFIG_\S*) is not set/) {
+ $force_config{$1} = $_;
+ }
+ }
+ close IN;
+}
+
+sub build {
+ my ($type) = @_;
+
+ unlink $buildlog;
+
+ my $start_time = time;
+
+ # Failed builds should not reboot the target
+ my $save_no_reboot = $no_reboot;
+ $no_reboot = 1;
+
+ # Calculate a new version from here.
+ $have_version = 0;
+
+ if (defined($pre_build)) {
+ my $ret = run_command $pre_build;
+ if (!$ret && defined($pre_build_die) &&
+ $pre_build_die) {
+ dodie "failed to pre_build\n";
+ }
+ }
+
+ if ($type =~ /^useconfig:(.*)/) {
+ run_command "cp $1 $output_config" or
+ dodie "could not copy $1 to .config";
+
+ $type = "oldconfig";
+ }
+
+ # old config can ask questions
+ if ($type eq "oldconfig") {
+ $type = "olddefconfig";
+
+ # allow for empty configs
+ run_command "touch $output_config";
+
+ if (!$noclean) {
+ run_command "mv $output_config $outputdir/config_temp" or
+ dodie "moving .config";
+
+ run_command "$make mrproper" or dodie "make mrproper";
+
+ run_command "mv $outputdir/config_temp $output_config" or
+ dodie "moving config_temp";
+ }
+
+ } elsif (!$noclean) {
+ unlink "$output_config";
+ run_command "$make mrproper" or
+ dodie "make mrproper";
+ }
+
+ # add something to distinguish this build
+ open(OUT, "> $outputdir/localversion") or dodie("Can't make localversion file");
+ print OUT "$localversion\n";
+ close(OUT);
+
+ if (defined($minconfig)) {
+ load_force_config($minconfig);
+ }
+
+ if ($type ne "olddefconfig") {
+ run_command "$make $type" or
+ dodie "failed make config";
+ }
+ # Run old config regardless, to enforce min configurations
+ make_oldconfig;
+
+ my $build_ret = run_command "$make $build_options", $buildlog;
+
+ if (defined($post_build)) {
+ # Because a post build may change the kernel version
+ # do it now.
+ get_version;
+ my $ret = run_command $post_build;
+ if (!$ret && defined($post_build_die) &&
+ $post_build_die) {
+ dodie "failed to post_build\n";
+ }
+ }
+
+ if (!$build_ret) {
+ # bisect may need this to pass
+ if ($in_bisect) {
+ $no_reboot = $save_no_reboot;
+ return 0;
+ }
+ fail "failed build" and return 0;
+ }
+
+ $no_reboot = $save_no_reboot;
+
+ my $end_time = time;
+ $build_time = $end_time - $start_time;
+
+ return 1;
+}
+
+sub halt {
+ if (!run_ssh "halt" or defined($power_off)) {
+ if (defined($poweroff_after_halt)) {
+ sleep $poweroff_after_halt;
+ run_command "$power_off";
+ }
+ } else {
+ # nope? the zap it!
+ run_command "$power_off";
+ }
+}
+
+sub success {
+ my ($i) = @_;
+
+ $successes++;
+
+ my $name = "";
+
+ if (defined($test_name)) {
+ $name = " ($test_name)";
+ }
+
+ print_times;
+
+ doprint "\n\n*******************************************\n";
+ doprint "*******************************************\n";
+ doprint "KTEST RESULT: TEST $i$name SUCCESS!!!! **\n";
+ doprint "*******************************************\n";
+ doprint "*******************************************\n";
+
+ if (defined($store_successes)) {
+ save_logs "success", $store_successes;
+ }
+
+ if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) {
+ doprint "Reboot and wait $sleep_time seconds\n";
+ reboot_to_good $sleep_time;
+ }
+
+ if (defined($post_test)) {
+ run_command $post_test;
+ }
+}
+
+sub answer_bisect {
+ for (;;) {
+ doprint "Pass, fail, or skip? [p/f/s]";
+ my $ans = <STDIN>;
+ chomp $ans;
+ if ($ans eq "p" || $ans eq "P") {
+ return 1;
+ } elsif ($ans eq "f" || $ans eq "F") {
+ return 0;
+ } elsif ($ans eq "s" || $ans eq "S") {
+ return -1;
+ } else {
+ print "Please answer 'p', 'f', or 's'\n";
+ }
+ }
+}
+
+sub child_run_test {
+
+ # child should have no power
+ $reboot_on_error = 0;
+ $poweroff_on_error = 0;
+ $die_on_failure = 1;
+
+ run_command $run_test, $testlog;
+
+ exit $run_command_status;
+}
+
+my $child_done;
+
+sub child_finished {
+ $child_done = 1;
+}
+
+sub do_run_test {
+ my $child_pid;
+ my $child_exit;
+ my $line;
+ my $full_line;
+ my $bug = 0;
+ my $bug_ignored = 0;
+
+ my $start_time = time;
+
+ wait_for_monitor 1;
+
+ doprint "run test $run_test\n";
+
+ $child_done = 0;
+
+ $SIG{CHLD} = qw(child_finished);
+
+ $child_pid = fork;
+
+ child_run_test if (!$child_pid);
+
+ $full_line = "";
+
+ do {
+ $line = wait_for_input($monitor_fp, 1);
+ if (defined($line)) {
+
+ # we are not guaranteed to get a full line
+ $full_line .= $line;
+ doprint $line;
+
+ if ($full_line =~ /call trace:/i) {
+ if ($ignore_errors) {
+ $bug_ignored = 1;
+ } else {
+ $bug = 1;
+ }
+ }
+
+ if ($full_line =~ /Kernel panic -/) {
+ $bug = 1;
+ }
+
+ if ($line =~ /\n/) {
+ $full_line = "";
+ }
+ }
+ } while (!$child_done && !$bug);
+
+ if (!$bug && $bug_ignored) {
+ doprint "WARNING: Call Trace detected but ignored due to IGNORE_ERRORS=1\n";
+ }
+
+ if ($bug) {
+ my $failure_start = time;
+ my $now;
+ do {
+ $line = wait_for_input($monitor_fp, 1);
+ if (defined($line)) {
+ doprint $line;
+ }
+ $now = time;
+ if ($now - $failure_start >= $stop_after_failure) {
+ last;
+ }
+ } while (defined($line));
+
+ doprint "Detected kernel crash!\n";
+ # kill the child with extreme prejudice
+ kill 9, $child_pid;
+ }
+
+ waitpid $child_pid, 0;
+ $child_exit = $? >> 8;
+
+ my $end_time = time;
+ $test_time = $end_time - $start_time;
+
+ if (!$bug && $in_bisect) {
+ if (defined($bisect_ret_good)) {
+ if ($child_exit == $bisect_ret_good) {
+ return 1;
+ }
+ }
+ if (defined($bisect_ret_skip)) {
+ if ($child_exit == $bisect_ret_skip) {
+ return -1;
+ }
+ }
+ if (defined($bisect_ret_abort)) {
+ if ($child_exit == $bisect_ret_abort) {
+ fail "test abort" and return -2;
+ }
+ }
+ if (defined($bisect_ret_bad)) {
+ if ($child_exit == $bisect_ret_skip) {
+ return 0;
+ }
+ }
+ if (defined($bisect_ret_default)) {
+ if ($bisect_ret_default eq "good") {
+ return 1;
+ } elsif ($bisect_ret_default eq "bad") {
+ return 0;
+ } elsif ($bisect_ret_default eq "skip") {
+ return -1;
+ } elsif ($bisect_ret_default eq "abort") {
+ return -2;
+ } else {
+ fail "unknown default action: $bisect_ret_default"
+ and return -2;
+ }
+ }
+ }
+
+ if ($bug || $child_exit) {
+ return 0 if $in_bisect;
+ fail "test failed" and return 0;
+ }
+ return 1;
+}
+
+sub run_git_bisect {
+ my ($command) = @_;
+
+ doprint "$command ... ";
+
+ my $output = `$command 2>&1`;
+ my $ret = $?;
+
+ logit $output;
+
+ if ($ret) {
+ doprint "FAILED\n";
+ dodie "Failed to git bisect";
+ }
+
+ doprint "SUCCESS\n";
+ if ($output =~ m/^(Bisecting: .*\(roughly \d+ steps?\))\s+\[([[:xdigit:]]+)\]/) {
+ doprint "$1 [$2]\n";
+ } elsif ($output =~ m/^([[:xdigit:]]+) is the first bad commit/) {
+ $bisect_bad_commit = $1;
+ doprint "Found bad commit... $1\n";
+ return 0;
+ } else {
+ # we already logged it, just print it now.
+ print $output;
+ }
+
+ return 1;
+}
+
+sub bisect_reboot {
+ doprint "Reboot and sleep $bisect_sleep_time seconds\n";
+ reboot_to_good $bisect_sleep_time;
+}
+
+# returns 1 on success, 0 on failure, -1 on skip
+sub run_bisect_test {
+ my ($type, $buildtype) = @_;
+
+ my $failed = 0;
+ my $result;
+ my $output;
+ my $ret;
+
+ $in_bisect = 1;
+
+ build $buildtype or $failed = 1;
+
+ if ($type ne "build") {
+ if ($failed && $bisect_skip) {
+ $in_bisect = 0;
+ return -1;
+ }
+ dodie "Failed on build" if $failed;
+
+ # Now boot the box
+ start_monitor_and_install or $failed = 1;
+
+ if ($type ne "boot") {
+ if ($failed && $bisect_skip) {
+ end_monitor;
+ bisect_reboot;
+ $in_bisect = 0;
+ return -1;
+ }
+ dodie "Failed on boot" if $failed;
+
+ do_run_test or $failed = 1;
+ }
+ end_monitor;
+ }
+
+ if ($failed) {
+ $result = 0;
+ } else {
+ $result = 1;
+ }
+
+ # reboot the box to a kernel we can ssh to
+ if ($type ne "build") {
+ bisect_reboot;
+ }
+ $in_bisect = 0;
+
+ return $result;
+}
+
+sub run_bisect {
+ my ($type) = @_;
+ my $buildtype = "oldconfig";
+
+ # We should have a minconfig to use?
+ if (defined($minconfig)) {
+ $buildtype = "useconfig:$minconfig";
+ }
+
+ # If the user sets bisect_tries to less than 1, then no tries
+ # is a success.
+ my $ret = 1;
+
+ # Still let the user manually decide that though.
+ if ($bisect_tries < 1 && $bisect_manual) {
+ $ret = answer_bisect;
+ }
+
+ for (my $i = 0; $i < $bisect_tries; $i++) {
+ if ($bisect_tries > 1) {
+ my $t = $i + 1;
+ doprint("Running bisect trial $t of $bisect_tries:\n");
+ }
+ $ret = run_bisect_test $type, $buildtype;
+
+ if ($bisect_manual) {
+ $ret = answer_bisect;
+ }
+
+ last if (!$ret);
+ }
+
+ # Are we looking for where it worked, not failed?
+ if ($reverse_bisect && $ret >= 0) {
+ $ret = !$ret;
+ }
+
+ if ($ret > 0) {
+ return "good";
+ } elsif ($ret == 0) {
+ return "bad";
+ } elsif ($bisect_skip) {
+ doprint "HIT A BAD COMMIT ... SKIPPING\n";
+ return "skip";
+ }
+}
+
+sub update_bisect_replay {
+ my $tmp_log = "$tmpdir/ktest_bisect_log";
+ run_command "git bisect log > $tmp_log" or
+ dodie "can't create bisect log";
+ return $tmp_log;
+}
+
+sub bisect {
+ my ($i) = @_;
+
+ my $result;
+
+ dodie "BISECT_GOOD[$i] not defined\n" if (!defined($bisect_good));
+ dodie "BISECT_BAD[$i] not defined\n" if (!defined($bisect_bad));
+ dodie "BISECT_TYPE[$i] not defined\n" if (!defined($bisect_type));
+
+ my $good = $bisect_good;
+ my $bad = $bisect_bad;
+ my $type = $bisect_type;
+ my $start = $bisect_start;
+ my $replay = $bisect_replay;
+ my $start_files = $bisect_files;
+
+ if (defined($start_files)) {
+ $start_files = " -- " . $start_files;
+ } else {
+ $start_files = "";
+ }
+
+ # convert to true sha1's
+ $good = get_sha1($good);
+ $bad = get_sha1($bad);
+
+ if (defined($bisect_reverse) && $bisect_reverse == 1) {
+ doprint "Performing a reverse bisect (bad is good, good is bad!)\n";
+ $reverse_bisect = 1;
+ } else {
+ $reverse_bisect = 0;
+ }
+
+ # Can't have a test without having a test to run
+ if ($type eq "test" && !defined($run_test)) {
+ $type = "boot";
+ }
+
+ # Check if a bisect was running
+ my $bisect_start_file = "$builddir/.git/BISECT_START";
+
+ my $check = $bisect_check;
+ my $do_check = defined($check) && $check ne "0";
+
+ if ( -f $bisect_start_file ) {
+ print "Bisect in progress found\n";
+ if ($do_check) {
+ print " If you say yes, then no checks of good or bad will be done\n";
+ }
+ if (defined($replay)) {
+ print "** BISECT_REPLAY is defined in config file **";
+ print " Ignore config option and perform new git bisect log?\n";
+ if (read_ync " (yes, no, or cancel) ") {
+ $replay = update_bisect_replay;
+ $do_check = 0;
+ }
+ } elsif (read_yn "read git log and continue?") {
+ $replay = update_bisect_replay;
+ $do_check = 0;
+ }
+ }
+
+ if ($do_check) {
+
+ # get current HEAD
+ my $head = get_sha1("HEAD");
+
+ if ($check ne "good") {
+ doprint "TESTING BISECT BAD [$bad]\n";
+ run_command "git checkout $bad" or
+ dodie "Failed to checkout $bad";
+
+ $result = run_bisect $type;
+
+ if ($result ne "bad") {
+ fail "Tested BISECT_BAD [$bad] and it succeeded" and return 0;
+ }
+ }
+
+ if ($check ne "bad") {
+ doprint "TESTING BISECT GOOD [$good]\n";
+ run_command "git checkout $good" or
+ dodie "Failed to checkout $good";
+
+ $result = run_bisect $type;
+
+ if ($result ne "good") {
+ fail "Tested BISECT_GOOD [$good] and it failed" and return 0;
+ }
+ }
+
+ # checkout where we started
+ run_command "git checkout $head" or
+ dodie "Failed to checkout $head";
+ }
+
+ run_command "git bisect start$start_files" or
+ dodie "could not start bisect";
+
+ if (defined($replay)) {
+ run_command "git bisect replay $replay" or
+ dodie "failed to run replay";
+ } else {
+
+ run_command "git bisect good $good" or
+ dodie "could not set bisect good to $good";
+
+ run_git_bisect "git bisect bad $bad" or
+ dodie "could not set bisect bad to $bad";
+
+ }
+
+ if (defined($start)) {
+ run_command "git checkout $start" or
+ dodie "failed to checkout $start";
+ }
+
+ my $test;
+ do {
+ $result = run_bisect $type;
+ $test = run_git_bisect "git bisect $result";
+ print_times;
+ } while ($test);
+
+ run_command "git bisect log" or
+ dodie "could not capture git bisect log";
+
+ run_command "git bisect reset" or
+ dodie "could not reset git bisect";
+
+ doprint "Bad commit was [$bisect_bad_commit]\n";
+
+ success $i;
+}
+
+# config_ignore holds the configs that were set (or unset) for
+# a good config and we will ignore these configs for the rest
+# of a config bisect. These configs stay as they were.
+my %config_ignore;
+
+# config_set holds what all configs were set as.
+my %config_set;
+
+# config_off holds the set of configs that the bad config had disabled.
+# We need to record them and set them in the .config when running
+# olddefconfig, because olddefconfig keeps the defaults.
+my %config_off;
+
+# config_off_tmp holds a set of configs to turn off for now
+my @config_off_tmp;
+
+# config_list is the set of configs that are being tested
+my %config_list;
+my %null_config;
+
+my %dependency;
+
+sub assign_configs {
+ my ($hash, $config) = @_;
+
+ doprint "Reading configs from $config\n";
+
+ open (IN, $config)
+ or dodie "Failed to read $config";
+
+ while (<IN>) {
+ chomp;
+ if (/^((CONFIG\S*)=.*)/) {
+ ${$hash}{$2} = $1;
+ } elsif (/^(# (CONFIG\S*) is not set)/) {
+ ${$hash}{$2} = $1;
+ }
+ }
+
+ close(IN);
+}
+
+sub process_config_ignore {
+ my ($config) = @_;
+
+ assign_configs \%config_ignore, $config;
+}
+
+sub get_dependencies {
+ my ($config) = @_;
+
+ my $arr = $dependency{$config};
+ if (!defined($arr)) {
+ return ();
+ }
+
+ my @deps = @{$arr};
+
+ foreach my $dep (@{$arr}) {
+ print "ADD DEP $dep\n";
+ @deps = (@deps, get_dependencies $dep);
+ }
+
+ return @deps;
+}
+
+sub save_config {
+ my ($pc, $file) = @_;
+
+ my %configs = %{$pc};
+
+ doprint "Saving configs into $file\n";
+
+ open(OUT, ">$file") or dodie "Can not write to $file";
+
+ foreach my $config (keys %configs) {
+ print OUT "$configs{$config}\n";
+ }
+ close(OUT);
+}
+
+sub create_config {
+ my ($name, $pc) = @_;
+
+ doprint "Creating old config from $name configs\n";
+
+ save_config $pc, $output_config;
+
+ make_oldconfig;
+}
+
+sub run_config_bisect_test {
+ my ($type) = @_;
+
+ my $ret = run_bisect_test $type, "oldconfig";
+
+ if ($bisect_manual) {
+ $ret = answer_bisect;
+ }
+
+ return $ret;
+}
+
+sub config_bisect_end {
+ my ($good, $bad) = @_;
+ my $diffexec = "diff -u";
+
+ if (-f "$builddir/scripts/diffconfig") {
+ $diffexec = "$builddir/scripts/diffconfig";
+ }
+ doprint "\n\n***************************************\n";
+ doprint "No more config bisecting possible.\n";
+ run_command "$diffexec $good $bad", 1;
+ doprint "***************************************\n\n";
+}
+
+sub run_config_bisect {
+ my ($good, $bad, $last_result) = @_;
+ my $reset = "";
+ my $cmd;
+ my $ret;
+
+ if (!length($last_result)) {
+ $reset = "-r";
+ }
+ run_command "$config_bisect_exec $reset -b $outputdir $good $bad $last_result", 1;
+
+ # config-bisect returns:
+ # 0 if there is more to bisect
+ # 1 for finding a good config
+ # 2 if it can not find any more configs
+ # -1 (255) on error
+ if ($run_command_status) {
+ return $run_command_status;
+ }
+
+ $ret = run_config_bisect_test $config_bisect_type;
+ if ($ret) {
+ doprint "NEW GOOD CONFIG\n";
+ # Return 3 for good config
+ return 3;
+ } else {
+ doprint "NEW BAD CONFIG\n";
+ # Return 4 for bad config
+ return 4;
+ }
+}
+
+sub config_bisect {
+ my ($i) = @_;
+
+ my $good_config;
+ my $bad_config;
+
+ my $type = $config_bisect_type;
+ my $ret;
+
+ $bad_config = $config_bisect;
+
+ if (defined($config_bisect_good)) {
+ $good_config = $config_bisect_good;
+ } elsif (defined($minconfig)) {
+ $good_config = $minconfig;
+ } else {
+ doprint "No config specified, checking if defconfig works";
+ $ret = run_bisect_test $type, "defconfig";
+ if (!$ret) {
+ fail "Have no good config to compare with, please set CONFIG_BISECT_GOOD";
+ return 1;
+ }
+ $good_config = $output_config;
+ }
+
+ if (!defined($config_bisect_exec)) {
+ # First check the location that ktest.pl ran
+ my @locations = ( "$pwd/config-bisect.pl",
+ "$dirname/config-bisect.pl",
+ "$builddir/tools/testing/ktest/config-bisect.pl",
+ undef );
+ foreach my $loc (@locations) {
+ doprint "loc = $loc\n";
+ $config_bisect_exec = $loc;
+ last if (defined($config_bisect_exec && -x $config_bisect_exec));
+ }
+ if (!defined($config_bisect_exec)) {
+ fail "Could not find an executable config-bisect.pl\n",
+ " Set CONFIG_BISECT_EXEC to point to config-bisect.pl";
+ return 1;
+ }
+ }
+
+ # we don't want min configs to cause issues here.
+ doprint "Disabling 'MIN_CONFIG' for this test\n";
+ undef $minconfig;
+
+ my %good_configs;
+ my %bad_configs;
+ my %tmp_configs;
+
+ if (-f "$tmpdir/good_config.tmp" || -f "$tmpdir/bad_config.tmp") {
+ if (read_yn "Interrupted config-bisect. Continue (n - will start new)?") {
+ if (-f "$tmpdir/good_config.tmp") {
+ $good_config = "$tmpdir/good_config.tmp";
+ } else {
+ $good_config = "$tmpdir/good_config";
+ }
+ if (-f "$tmpdir/bad_config.tmp") {
+ $bad_config = "$tmpdir/bad_config.tmp";
+ } else {
+ $bad_config = "$tmpdir/bad_config";
+ }
+ }
+ }
+ doprint "Run good configs through make oldconfig\n";
+ assign_configs \%tmp_configs, $good_config;
+ create_config "$good_config", \%tmp_configs;
+ $good_config = "$tmpdir/good_config";
+ system("cp $output_config $good_config") == 0 or dodie "cp good config";
+
+ doprint "Run bad configs through make oldconfig\n";
+ assign_configs \%tmp_configs, $bad_config;
+ create_config "$bad_config", \%tmp_configs;
+ $bad_config = "$tmpdir/bad_config";
+ system("cp $output_config $bad_config") == 0 or dodie "cp bad config";
+
+ if (defined($config_bisect_check) && $config_bisect_check ne "0") {
+ if ($config_bisect_check ne "good") {
+ doprint "Testing bad config\n";
+
+ $ret = run_bisect_test $type, "useconfig:$bad_config";
+ if ($ret) {
+ fail "Bad config succeeded when expected to fail!";
+ return 0;
+ }
+ }
+ if ($config_bisect_check ne "bad") {
+ doprint "Testing good config\n";
+
+ $ret = run_bisect_test $type, "useconfig:$good_config";
+ if (!$ret) {
+ fail "Good config failed when expected to succeed!";
+ return 0;
+ }
+ }
+ }
+
+ my $last_run = "";
+
+ do {
+ $ret = run_config_bisect $good_config, $bad_config, $last_run;
+ if ($ret == 3) {
+ $last_run = "good";
+ } elsif ($ret == 4) {
+ $last_run = "bad";
+ }
+ print_times;
+ } while ($ret == 3 || $ret == 4);
+
+ if ($ret == 2) {
+ config_bisect_end "$good_config.tmp", "$bad_config.tmp";
+ }
+
+ return $ret if ($ret < 0);
+
+ success $i;
+}
+
+sub patchcheck_reboot {
+ doprint "Reboot and sleep $patchcheck_sleep_time seconds\n";
+ reboot_to_good $patchcheck_sleep_time;
+}
+
+sub patchcheck {
+ my ($i) = @_;
+
+ dodie "PATCHCHECK_START[$i] not defined\n"
+ if (!defined($patchcheck_start));
+ dodie "PATCHCHECK_TYPE[$i] not defined\n"
+ if (!defined($patchcheck_type));
+
+ my $start = $patchcheck_start;
+
+ my $cherry = $patchcheck_cherry;
+ if (!defined($cherry)) {
+ $cherry = 0;
+ }
+
+ my $end = "HEAD";
+ if (defined($patchcheck_end)) {
+ $end = $patchcheck_end;
+ } elsif ($cherry) {
+ dodie "PATCHCHECK_END must be defined with PATCHCHECK_CHERRY\n";
+ }
+
+ # Get the true sha1's since we can use things like HEAD~3
+ $start = get_sha1($start);
+ $end = get_sha1($end);
+
+ my $type = $patchcheck_type;
+
+ # Can't have a test without having a test to run
+ if ($type eq "test" && !defined($run_test)) {
+ $type = "boot";
+ }
+
+ if ($cherry) {
+ open (IN, "git cherry -v $start $end|") or
+ dodie "could not get git list";
+ } else {
+ open (IN, "git log --pretty=oneline $end|") or
+ dodie "could not get git list";
+ }
+
+ my @list;
+
+ while (<IN>) {
+ chomp;
+ # git cherry adds a '+' we want to remove
+ s/^\+ //;
+ $list[$#list+1] = $_;
+ last if (/^$start/);
+ }
+ close(IN);
+
+ if (!$cherry) {
+ if ($list[$#list] !~ /^$start/) {
+ fail "SHA1 $start not found";
+ }
+
+ # go backwards in the list
+ @list = reverse @list;
+ }
+
+ doprint("Going to test the following commits:\n");
+ foreach my $l (@list) {
+ doprint "$l\n";
+ }
+
+ my $save_clean = $noclean;
+ my %ignored_warnings;
+
+ if (defined($ignore_warnings)) {
+ foreach my $sha1 (split /\s+/, $ignore_warnings) {
+ $ignored_warnings{$sha1} = 1;
+ }
+ }
+
+ $in_patchcheck = 1;
+ foreach my $item (@list) {
+ my $sha1 = $item;
+ $sha1 =~ s/^([[:xdigit:]]+).*/$1/;
+
+ doprint "\nProcessing commit \"$item\"\n\n";
+
+ run_command "git checkout $sha1" or
+ dodie "Failed to checkout $sha1";
+
+ # only clean on the first and last patch
+ if ($item eq $list[0] ||
+ $item eq $list[$#list]) {
+ $noclean = $save_clean;
+ } else {
+ $noclean = 1;
+ }
+
+ if (defined($minconfig)) {
+ build "useconfig:$minconfig" or return 0;
+ } else {
+ # ?? no config to use?
+ build "oldconfig" or return 0;
+ }
+
+ # No need to do per patch checking if warnings file exists
+ if (!defined($warnings_file) && !defined($ignored_warnings{$sha1})) {
+ check_patch_buildlog $sha1 or return 0;
+ }
+
+ check_buildlog or return 0;
+
+ next if ($type eq "build");
+
+ my $failed = 0;
+
+ start_monitor_and_install or $failed = 1;
+
+ if (!$failed && $type ne "boot"){
+ do_run_test or $failed = 1;
+ }
+ end_monitor;
+ if ($failed) {
+ print_times;
+ return 0;
+ }
+ patchcheck_reboot;
+ print_times;
+ }
+ $in_patchcheck = 0;
+ success $i;
+
+ return 1;
+}
+
+my %depends;
+my %depcount;
+my $iflevel = 0;
+my @ifdeps;
+
+# prevent recursion
+my %read_kconfigs;
+
+sub add_dep {
+ # $config depends on $dep
+ my ($config, $dep) = @_;
+
+ if (defined($depends{$config})) {
+ $depends{$config} .= " " . $dep;
+ } else {
+ $depends{$config} = $dep;
+ }
+
+ # record the number of configs depending on $dep
+ if (defined $depcount{$dep}) {
+ $depcount{$dep}++;
+ } else {
+ $depcount{$dep} = 1;
+ }
+}
+
+# taken from streamline_config.pl
+sub read_kconfig {
+ my ($kconfig) = @_;
+
+ my $state = "NONE";
+ my $config;
+ my @kconfigs;
+
+ my $cont = 0;
+ my $line;
+
+
+ if (! -f $kconfig) {
+ doprint "file $kconfig does not exist, skipping\n";
+ return;
+ }
+
+ open(KIN, "$kconfig")
+ or dodie "Can't open $kconfig";
+ while (<KIN>) {
+ chomp;
+
+ # Make sure that lines ending with \ continue
+ if ($cont) {
+ $_ = $line . " " . $_;
+ }
+
+ if (s/\\$//) {
+ $cont = 1;
+ $line = $_;
+ next;
+ }
+
+ $cont = 0;
+
+ # collect any Kconfig sources
+ if (/^source\s*"(.*)"/) {
+ $kconfigs[$#kconfigs+1] = $1;
+ }
+
+ # configs found
+ if (/^\s*(menu)?config\s+(\S+)\s*$/) {
+ $state = "NEW";
+ $config = $2;
+
+ for (my $i = 0; $i < $iflevel; $i++) {
+ add_dep $config, $ifdeps[$i];
+ }
+
+ # collect the depends for the config
+ } elsif ($state eq "NEW" && /^\s*depends\s+on\s+(.*)$/) {
+
+ add_dep $config, $1;
+
+ # Get the configs that select this config
+ } elsif ($state eq "NEW" && /^\s*select\s+(\S+)/) {
+
+ # selected by depends on config
+ add_dep $1, $config;
+
+ # Check for if statements
+ } elsif (/^if\s+(.*\S)\s*$/) {
+ my $deps = $1;
+ # remove beginning and ending non text
+ $deps =~ s/^[^a-zA-Z0-9_]*//;
+ $deps =~ s/[^a-zA-Z0-9_]*$//;
+
+ my @deps = split /[^a-zA-Z0-9_]+/, $deps;
+
+ $ifdeps[$iflevel++] = join ':', @deps;
+
+ } elsif (/^endif/) {
+
+ $iflevel-- if ($iflevel);
+
+ # stop on "help"
+ } elsif (/^\s*help\s*$/) {
+ $state = "NONE";
+ }
+ }
+ close(KIN);
+
+ # read in any configs that were found.
+ foreach $kconfig (@kconfigs) {
+ if (!defined($read_kconfigs{$kconfig})) {
+ $read_kconfigs{$kconfig} = 1;
+ read_kconfig("$builddir/$kconfig");
+ }
+ }
+}
+
+sub read_depends {
+ # find out which arch this is by the kconfig file
+ open (IN, $output_config)
+ or dodie "Failed to read $output_config";
+ my $arch;
+ while (<IN>) {
+ if (m,Linux/(\S+)\s+\S+\s+Kernel Configuration,) {
+ $arch = $1;
+ last;
+ }
+ }
+ close IN;
+
+ if (!defined($arch)) {
+ doprint "Could not find arch from config file\n";
+ doprint "no dependencies used\n";
+ return;
+ }
+
+ # arch is really the subarch, we need to know
+ # what directory to look at.
+ if ($arch eq "i386" || $arch eq "x86_64") {
+ $arch = "x86";
+ }
+
+ my $kconfig = "$builddir/arch/$arch/Kconfig";
+
+ if (! -f $kconfig && $arch =~ /\d$/) {
+ my $orig = $arch;
+ # some subarchs have numbers, truncate them
+ $arch =~ s/\d*$//;
+ $kconfig = "$builddir/arch/$arch/Kconfig";
+ if (! -f $kconfig) {
+ doprint "No idea what arch dir $orig is for\n";
+ doprint "no dependencies used\n";
+ return;
+ }
+ }
+
+ read_kconfig($kconfig);
+}
+
+sub make_new_config {
+ my @configs = @_;
+
+ open (OUT, ">$output_config")
+ or dodie "Failed to write $output_config";
+
+ foreach my $config (@configs) {
+ print OUT "$config\n";
+ }
+ close OUT;
+}
+
+sub chomp_config {
+ my ($config) = @_;
+
+ $config =~ s/CONFIG_//;
+
+ return $config;
+}
+
+sub get_depends {
+ my ($dep) = @_;
+
+ my $kconfig = chomp_config $dep;
+
+ $dep = $depends{"$kconfig"};
+
+ # the dep string we have saves the dependencies as they
+ # were found, including expressions like ! && ||. We
+ # want to split this out into just an array of configs.
+
+ my $valid = "A-Za-z_0-9";
+
+ my @configs;
+
+ while ($dep =~ /[$valid]/) {
+
+ if ($dep =~ /^[^$valid]*([$valid]+)/) {
+ my $conf = "CONFIG_" . $1;
+
+ $configs[$#configs + 1] = $conf;
+
+ $dep =~ s/^[^$valid]*[$valid]+//;
+ } else {
+ dodie "this should never happen";
+ }
+ }
+
+ return @configs;
+}
+
+my %min_configs;
+my %keep_configs;
+my %save_configs;
+my %processed_configs;
+my %nochange_config;
+
+sub test_this_config {
+ my ($config) = @_;
+
+ my $found;
+
+ # if we already processed this config, skip it
+ if (defined($processed_configs{$config})) {
+ return undef;
+ }
+ $processed_configs{$config} = 1;
+
+ # if this config failed during this round, skip it
+ if (defined($nochange_config{$config})) {
+ return undef;
+ }
+
+ my $kconfig = chomp_config $config;
+
+ # Test dependencies first
+ if (defined($depends{"$kconfig"})) {
+ my @parents = get_depends $config;
+ foreach my $parent (@parents) {
+ # if the parent is in the min config, check it first
+ next if (!defined($min_configs{$parent}));
+ $found = test_this_config($parent);
+ if (defined($found)) {
+ return $found;
+ }
+ }
+ }
+
+ # Remove this config from the list of configs
+ # do a make olddefconfig and then read the resulting
+ # .config to make sure it is missing the config that
+ # we had before
+ my %configs = %min_configs;
+ delete $configs{$config};
+ make_new_config ((values %configs), (values %keep_configs));
+ make_oldconfig;
+ undef %configs;
+ assign_configs \%configs, $output_config;
+
+ if (!defined($configs{$config}) || $configs{$config} =~ /^#/) {
+ return $config;
+ }
+
+ doprint "disabling config $config did not change .config\n";
+
+ $nochange_config{$config} = 1;
+
+ return undef;
+}
+
+sub make_min_config {
+ my ($i) = @_;
+
+ my $type = $minconfig_type;
+ if ($type ne "boot" && $type ne "test") {
+ fail "Invalid MIN_CONFIG_TYPE '$minconfig_type'\n" .
+ " make_min_config works only with 'boot' and 'test'\n" and return;
+ }
+
+ if (!defined($output_minconfig)) {
+ fail "OUTPUT_MIN_CONFIG not defined" and return;
+ }
+
+ # If output_minconfig exists, and the start_minconfig
+ # came from min_config, than ask if we should use
+ # that instead.
+ if (-f $output_minconfig && !$start_minconfig_defined) {
+ print "$output_minconfig exists\n";
+ if (!defined($use_output_minconfig)) {
+ if (read_yn " Use it as minconfig?") {
+ $start_minconfig = $output_minconfig;
+ }
+ } elsif ($use_output_minconfig > 0) {
+ doprint "Using $output_minconfig as MIN_CONFIG\n";
+ $start_minconfig = $output_minconfig;
+ } else {
+ doprint "Set to still use MIN_CONFIG as starting point\n";
+ }
+ }
+
+ if (!defined($start_minconfig)) {
+ fail "START_MIN_CONFIG or MIN_CONFIG not defined" and return;
+ }
+
+ my $temp_config = "$tmpdir/temp_config";
+
+ # First things first. We build an allnoconfig to find
+ # out what the defaults are that we can't touch.
+ # Some are selections, but we really can't handle selections.
+
+ my $save_minconfig = $minconfig;
+ undef $minconfig;
+
+ run_command "$make allnoconfig" or return 0;
+
+ read_depends;
+
+ process_config_ignore $output_config;
+
+ undef %save_configs;
+ undef %min_configs;
+
+ if (defined($ignore_config)) {
+ # make sure the file exists
+ `touch $ignore_config`;
+ assign_configs \%save_configs, $ignore_config;
+ }
+
+ %keep_configs = %save_configs;
+
+ doprint "Load initial configs from $start_minconfig\n";
+
+ # Look at the current min configs, and save off all the
+ # ones that were set via the allnoconfig
+ assign_configs \%min_configs, $start_minconfig;
+
+ my @config_keys = keys %min_configs;
+
+ # All configs need a depcount
+ foreach my $config (@config_keys) {
+ my $kconfig = chomp_config $config;
+ if (!defined $depcount{$kconfig}) {
+ $depcount{$kconfig} = 0;
+ }
+ }
+
+ # Remove anything that was set by the make allnoconfig
+ # we shouldn't need them as they get set for us anyway.
+ foreach my $config (@config_keys) {
+ # Remove anything in the ignore_config
+ if (defined($keep_configs{$config})) {
+ my $file = $ignore_config;
+ $file =~ s,.*/(.*?)$,$1,;
+ doprint "$config set by $file ... ignored\n";
+ delete $min_configs{$config};
+ next;
+ }
+ # But make sure the settings are the same. If a min config
+ # sets a selection, we do not want to get rid of it if
+ # it is not the same as what we have. Just move it into
+ # the keep configs.
+ if (defined($config_ignore{$config})) {
+ if ($config_ignore{$config} ne $min_configs{$config}) {
+ doprint "$config is in allnoconfig as '$config_ignore{$config}'";
+ doprint " but it is '$min_configs{$config}' in minconfig .. keeping\n";
+ $keep_configs{$config} = $min_configs{$config};
+ } else {
+ doprint "$config set by allnoconfig ... ignored\n";
+ }
+ delete $min_configs{$config};
+ }
+ }
+
+ my $done = 0;
+ my $take_two = 0;
+
+ while (!$done) {
+
+ my $config;
+ my $found;
+
+ # Now disable each config one by one and do a make oldconfig
+ # till we find a config that changes our list.
+
+ my @test_configs = keys %min_configs;
+
+ # Sort keys by who is most dependent on
+ @test_configs = sort { $depcount{chomp_config($b)} <=> $depcount{chomp_config($a)} }
+ @test_configs ;
+
+ # Put configs that did not modify the config at the end.
+ my $reset = 1;
+ for (my $i = 0; $i < $#test_configs; $i++) {
+ if (!defined($nochange_config{$test_configs[0]})) {
+ $reset = 0;
+ last;
+ }
+ # This config didn't change the .config last time.
+ # Place it at the end
+ my $config = shift @test_configs;
+ push @test_configs, $config;
+ }
+
+ # if every test config has failed to modify the .config file
+ # in the past, then reset and start over.
+ if ($reset) {
+ undef %nochange_config;
+ }
+
+ undef %processed_configs;
+
+ foreach my $config (@test_configs) {
+
+ $found = test_this_config $config;
+
+ last if (defined($found));
+
+ # oh well, try another config
+ }
+
+ if (!defined($found)) {
+ # we could have failed due to the nochange_config hash
+ # reset and try again
+ if (!$take_two) {
+ undef %nochange_config;
+ $take_two = 1;
+ next;
+ }
+ doprint "No more configs found that we can disable\n";
+ $done = 1;
+ last;
+ }
+ $take_two = 0;
+
+ $config = $found;
+
+ doprint "Test with $config disabled\n";
+
+ # set in_bisect to keep build and monitor from dieing
+ $in_bisect = 1;
+
+ my $failed = 0;
+ build "oldconfig" or $failed = 1;
+ if (!$failed) {
+ start_monitor_and_install or $failed = 1;
+
+ if ($type eq "test" && !$failed) {
+ do_run_test or $failed = 1;
+ }
+
+ end_monitor;
+ }
+
+ $in_bisect = 0;
+
+ if ($failed) {
+ doprint "$min_configs{$config} is needed to boot the box... keeping\n";
+ # this config is needed, add it to the ignore list.
+ $keep_configs{$config} = $min_configs{$config};
+ $save_configs{$config} = $min_configs{$config};
+ delete $min_configs{$config};
+
+ # update new ignore configs
+ if (defined($ignore_config)) {
+ open (OUT, ">$temp_config")
+ or dodie "Can't write to $temp_config";
+ foreach my $config (keys %save_configs) {
+ print OUT "$save_configs{$config}\n";
+ }
+ close OUT;
+ run_command "mv $temp_config $ignore_config" or
+ dodie "failed to copy update to $ignore_config";
+ }
+
+ } else {
+ # We booted without this config, remove it from the minconfigs.
+ doprint "$config is not needed, disabling\n";
+
+ delete $min_configs{$config};
+
+ # Also disable anything that is not enabled in this config
+ my %configs;
+ assign_configs \%configs, $output_config;
+ my @config_keys = keys %min_configs;
+ foreach my $config (@config_keys) {
+ if (!defined($configs{$config})) {
+ doprint "$config is not set, disabling\n";
+ delete $min_configs{$config};
+ }
+ }
+
+ # Save off all the current mandatory configs
+ open (OUT, ">$temp_config")
+ or dodie "Can't write to $temp_config";
+ foreach my $config (keys %keep_configs) {
+ print OUT "$keep_configs{$config}\n";
+ }
+ foreach my $config (keys %min_configs) {
+ print OUT "$min_configs{$config}\n";
+ }
+ close OUT;
+
+ run_command "mv $temp_config $output_minconfig" or
+ dodie "failed to copy update to $output_minconfig";
+ }
+
+ doprint "Reboot and wait $sleep_time seconds\n";
+ reboot_to_good $sleep_time;
+ }
+
+ success $i;
+ return 1;
+}
+
+sub make_warnings_file {
+ my ($i) = @_;
+
+ if (!defined($warnings_file)) {
+ dodie "Must define WARNINGS_FILE for make_warnings_file test";
+ }
+
+ if ($build_type eq "nobuild") {
+ dodie "BUILD_TYPE can not be 'nobuild' for make_warnings_file test";
+ }
+
+ build $build_type or dodie "Failed to build";
+
+ open(OUT, ">$warnings_file") or dodie "Can't create $warnings_file";
+
+ open(IN, $buildlog) or dodie "Can't open $buildlog";
+ while (<IN>) {
+
+ # Some compilers use UTF-8 extended for quotes
+ # for distcc heterogeneous systems, this causes issues
+ s/$utf8_quote/'/g;
+
+ if (/$check_build_re/) {
+ print OUT;
+ }
+ }
+ close(IN);
+
+ close(OUT);
+
+ success $i;
+}
+
+$#ARGV < 1 or die "ktest.pl version: $VERSION\n usage: ktest.pl [config-file]\n";
+
+if ($#ARGV == 0) {
+ $ktest_config = $ARGV[0];
+ if (! -f $ktest_config) {
+ print "$ktest_config does not exist.\n";
+ if (!read_yn "Create it?") {
+ exit 0;
+ }
+ }
+}
+
+if (! -f $ktest_config) {
+ $newconfig = 1;
+ get_test_case;
+ open(OUT, ">$ktest_config") or die "Can not create $ktest_config";
+ print OUT << "EOF"
+# Generated by ktest.pl
+#
+
+# PWD is a ktest.pl variable that will result in the process working
+# directory that ktest.pl is executed in.
+
+# THIS_DIR is automatically assigned the PWD of the path that generated
+# the config file. It is best to use this variable when assigning other
+# directory paths within this directory. This allows you to easily
+# move the test cases to other locations or to other machines.
+#
+THIS_DIR := $variable{"PWD"}
+
+# Define each test with TEST_START
+# The config options below it will override the defaults
+TEST_START
+TEST_TYPE = $default{"TEST_TYPE"}
+
+DEFAULTS
+EOF
+;
+ close(OUT);
+}
+read_config $ktest_config;
+
+if (defined($opt{"LOG_FILE"})) {
+ $opt{"LOG_FILE"} = eval_option("LOG_FILE", $opt{"LOG_FILE"}, -1);
+}
+
+# Append any configs entered in manually to the config file.
+my @new_configs = keys %entered_configs;
+if ($#new_configs >= 0) {
+ print "\nAppending entered in configs to $ktest_config\n";
+ open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config";
+ foreach my $config (@new_configs) {
+ print OUT "$config = $entered_configs{$config}\n";
+ $opt{$config} = process_variables($entered_configs{$config});
+ }
+}
+
+if ($opt{"CLEAR_LOG"} && defined($opt{"LOG_FILE"})) {
+ unlink $opt{"LOG_FILE"};
+}
+
+doprint "\n\nSTARTING AUTOMATED TESTS\n\n";
+
+for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) {
+
+ if (!$i) {
+ doprint "DEFAULT OPTIONS:\n";
+ } else {
+ doprint "\nTEST $i OPTIONS";
+ if (defined($repeat_tests{$i})) {
+ $repeat = $repeat_tests{$i};
+ doprint " ITERATE $repeat";
+ }
+ doprint "\n";
+ }
+
+ foreach my $option (sort keys %opt) {
+
+ if ($option =~ /\[(\d+)\]$/) {
+ next if ($i != $1);
+ } else {
+ next if ($i);
+ }
+
+ doprint "$option = $opt{$option}\n";
+ }
+}
+
+sub option_defined {
+ my ($option) = @_;
+
+ if (defined($opt{$option}) && $opt{$option} !~ /^\s*$/) {
+ return 1;
+ }
+
+ return 0;
+}
+
+sub __set_test_option {
+ my ($name, $i) = @_;
+
+ my $option = "$name\[$i\]";
+
+ if (option_defined($option)) {
+ return $opt{$option};
+ }
+
+ foreach my $test (keys %repeat_tests) {
+ if ($i >= $test &&
+ $i < $test + $repeat_tests{$test}) {
+ $option = "$name\[$test\]";
+ if (option_defined($option)) {
+ return $opt{$option};
+ }
+ }
+ }
+
+ if (option_defined($name)) {
+ return $opt{$name};
+ }
+
+ return undef;
+}
+
+sub set_test_option {
+ my ($name, $i) = @_;
+
+ my $option = __set_test_option($name, $i);
+ return $option if (!defined($option));
+
+ return eval_option($name, $option, $i);
+}
+
+sub find_mailer {
+ my ($mailer) = @_;
+
+ my @paths = split /:/, $ENV{PATH};
+
+ # sendmail is usually in /usr/sbin
+ $paths[$#paths + 1] = "/usr/sbin";
+
+ foreach my $path (@paths) {
+ if (-x "$path/$mailer") {
+ return $path;
+ }
+ }
+
+ return undef;
+}
+
+sub do_send_mail {
+ my ($subject, $message) = @_;
+
+ if (!defined($mail_path)) {
+ # find the mailer
+ $mail_path = find_mailer $mailer;
+ if (!defined($mail_path)) {
+ die "\nCan not find $mailer in PATH\n";
+ }
+ }
+
+ if (!defined($mail_command)) {
+ if ($mailer eq "mail" || $mailer eq "mailx") {
+ $mail_command = "\$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO <<< \'\$MESSAGE\'";
+ } elsif ($mailer eq "sendmail" ) {
+ $mail_command = "echo \'Subject: \$SUBJECT\n\n\$MESSAGE\' | \$MAIL_PATH/\$MAILER -t \$MAILTO";
+ } else {
+ die "\nYour mailer: $mailer is not supported.\n";
+ }
+ }
+
+ $mail_command =~ s/\$MAILER/$mailer/g;
+ $mail_command =~ s/\$MAIL_PATH/$mail_path/g;
+ $mail_command =~ s/\$MAILTO/$mailto/g;
+ $mail_command =~ s/\$SUBJECT/$subject/g;
+ $mail_command =~ s/\$MESSAGE/$message/g;
+
+ my $ret = run_command $mail_command;
+ if (!$ret && defined($file)) {
+ # try again without the file
+ $message .= "\n\n*** FAILED TO SEND LOG ***\n\n";
+ do_send_email($subject, $message);
+ }
+}
+
+sub send_email {
+
+ if (defined($mailto)) {
+ if (!defined($mailer)) {
+ doprint "No email sent: email or mailer not specified in config.\n";
+ return;
+ }
+ do_send_mail @_;
+ }
+}
+
+sub cancel_test {
+ if ($email_when_canceled) {
+ send_email("KTEST: Your [$test_type] test was cancelled",
+ "Your test started at $script_start_time was cancelled: sig int");
+ }
+ die "\nCaught Sig Int, test interrupted: $!\n"
+}
+
+$SIG{INT} = qw(cancel_test);
+
+# First we need to do is the builds
+for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
+
+ # Do not reboot on failing test options
+ $no_reboot = 1;
+ $reboot_success = 0;
+
+ $have_version = 0;
+
+ $iteration = $i;
+
+ $build_time = 0;
+ $install_time = 0;
+ $reboot_time = 0;
+ $test_time = 0;
+
+ undef %force_config;
+
+ my $makecmd = set_test_option("MAKE_CMD", $i);
+
+ $outputdir = set_test_option("OUTPUT_DIR", $i);
+ $builddir = set_test_option("BUILD_DIR", $i);
+
+ chdir $builddir || dodie "can't change directory to $builddir";
+
+ if (!-d $outputdir) {
+ mkpath($outputdir) or
+ dodie "can't create $outputdir";
+ }
+
+ $make = "$makecmd O=$outputdir";
+
+ # Load all the options into their mapped variable names
+ foreach my $opt (keys %option_map) {
+ ${$option_map{$opt}} = set_test_option($opt, $i);
+ }
+
+ $start_minconfig_defined = 1;
+
+ # The first test may override the PRE_KTEST option
+ if ($i == 1) {
+ if (defined($pre_ktest)) {
+ doprint "\n";
+ run_command $pre_ktest;
+ }
+ if ($email_when_started) {
+ send_email("KTEST: Your [$test_type] test was started",
+ "Your test was started on $script_start_time");
+ }
+ }
+
+ # Any test can override the POST_KTEST option
+ # The last test takes precedence.
+ if (defined($post_ktest)) {
+ $final_post_ktest = $post_ktest;
+ }
+
+ if (!defined($start_minconfig)) {
+ $start_minconfig_defined = 0;
+ $start_minconfig = $minconfig;
+ }
+
+ if (!-d $tmpdir) {
+ mkpath($tmpdir) or
+ dodie "can't create $tmpdir";
+ }
+
+ $ENV{"SSH_USER"} = $ssh_user;
+ $ENV{"MACHINE"} = $machine;
+
+ $buildlog = "$tmpdir/buildlog-$machine";
+ $testlog = "$tmpdir/testlog-$machine";
+ $dmesg = "$tmpdir/dmesg-$machine";
+ $output_config = "$outputdir/.config";
+
+ if (!$buildonly) {
+ $target = "$ssh_user\@$machine";
+ if ($reboot_type eq "grub") {
+ dodie "GRUB_MENU not defined" if (!defined($grub_menu));
+ } elsif ($reboot_type eq "grub2") {
+ dodie "GRUB_MENU not defined" if (!defined($grub_menu));
+ dodie "GRUB_FILE not defined" if (!defined($grub_file));
+ } elsif ($reboot_type eq "syslinux") {
+ dodie "SYSLINUX_LABEL not defined" if (!defined($syslinux_label));
+ }
+ }
+
+ my $run_type = $build_type;
+ if ($test_type eq "patchcheck") {
+ $run_type = $patchcheck_type;
+ } elsif ($test_type eq "bisect") {
+ $run_type = $bisect_type;
+ } elsif ($test_type eq "config_bisect") {
+ $run_type = $config_bisect_type;
+ } elsif ($test_type eq "make_min_config") {
+ $run_type = "";
+ } elsif ($test_type eq "make_warnings_file") {
+ $run_type = "";
+ }
+
+ # mistake in config file?
+ if (!defined($run_type)) {
+ $run_type = "ERROR";
+ }
+
+ my $installme = "";
+ $installme = " no_install" if ($no_install);
+
+ my $name = "";
+
+ if (defined($test_name)) {
+ $name = " ($test_name)";
+ }
+
+ doprint "\n\n";
+ doprint "RUNNING TEST $i of $opt{NUM_TESTS}$name with option $test_type $run_type$installme\n\n";
+
+ if (defined($pre_test)) {
+ run_command $pre_test;
+ }
+
+ unlink $dmesg;
+ unlink $buildlog;
+ unlink $testlog;
+
+ if (defined($addconfig)) {
+ my $min = $minconfig;
+ if (!defined($minconfig)) {
+ $min = "";
+ }
+ run_command "cat $addconfig $min > $tmpdir/add_config" or
+ dodie "Failed to create temp config";
+ $minconfig = "$tmpdir/add_config";
+ }
+
+ if (defined($checkout)) {
+ run_command "git checkout $checkout" or
+ dodie "failed to checkout $checkout";
+ }
+
+ $no_reboot = 0;
+
+ # A test may opt to not reboot the box
+ if ($reboot_on_success) {
+ $reboot_success = 1;
+ }
+
+ if ($test_type eq "bisect") {
+ bisect $i;
+ next;
+ } elsif ($test_type eq "config_bisect") {
+ config_bisect $i;
+ next;
+ } elsif ($test_type eq "patchcheck") {
+ patchcheck $i;
+ next;
+ } elsif ($test_type eq "make_min_config") {
+ make_min_config $i;
+ next;
+ } elsif ($test_type eq "make_warnings_file") {
+ $no_reboot = 1;
+ make_warnings_file $i;
+ next;
+ }
+
+ if ($build_type ne "nobuild") {
+ build $build_type or next;
+ check_buildlog or next;
+ }
+
+ if ($test_type eq "install") {
+ get_version;
+ install;
+ success $i;
+ next;
+ }
+
+ if ($test_type ne "build") {
+ my $failed = 0;
+ start_monitor_and_install or $failed = 1;
+
+ if (!$failed && $test_type ne "boot" && defined($run_test)) {
+ do_run_test or $failed = 1;
+ }
+ end_monitor;
+ if ($failed) {
+ print_times;
+ next;
+ }
+ }
+
+ print_times;
+
+ success $i;
+}
+
+if (defined($final_post_ktest)) {
+ run_command $final_post_ktest;
+}
+
+if ($opt{"POWEROFF_ON_SUCCESS"}) {
+ halt;
+} elsif ($opt{"REBOOT_ON_SUCCESS"} && !do_not_reboot && $reboot_success) {
+ reboot_to_good;
+} elsif (defined($switch_to_good)) {
+ # still need to get to the good kernel
+ run_command $switch_to_good;
+}
+
+
+doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n";
+
+if ($email_when_finished) {
+ send_email("KTEST: Your [$test_type] test has finished!",
+ "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!");
+}
+exit 0;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
new file mode 100644
index 000000000..6ca6ca0ce
--- /dev/null
+++ b/tools/testing/ktest/sample.conf
@@ -0,0 +1,1348 @@
+#
+# Config file for ktest.pl
+#
+# Place your customized version of this, in the working directory that
+# ktest.pl is run from. By default, ktest.pl will look for a file
+# called "ktest.conf", but you can name it anything you like and specify
+# the name of your config file as the first argument of ktest.pl.
+#
+# Note, all paths must be absolute
+#
+
+# Options set in the beginning of the file are considered to be
+# default options. These options can be overriden by test specific
+# options, with the following exceptions:
+#
+# LOG_FILE
+# CLEAR_LOG
+# POWEROFF_ON_SUCCESS
+# REBOOT_ON_SUCCESS
+#
+# Test specific options are set after the label:
+#
+# TEST_START
+#
+# The options after a TEST_START label are specific to that test.
+# Each TEST_START label will set up a new test. If you want to
+# perform a test more than once, you can add the ITERATE label
+# to it followed by the number of times you want that test
+# to iterate. If the ITERATE is left off, the test will only
+# be performed once.
+#
+# TEST_START ITERATE 10
+#
+# You can skip a test by adding SKIP (before or after the ITERATE
+# and number)
+#
+# TEST_START SKIP
+#
+# TEST_START SKIP ITERATE 10
+#
+# TEST_START ITERATE 10 SKIP
+#
+# The SKIP label causes the options and the test itself to be ignored.
+# This is useful to set up several different tests in one config file, and
+# only enabling the ones you want to use for a current test run.
+#
+# You can add default options anywhere in the file as well
+# with the DEFAULTS tag. This allows you to have default options
+# after the test options to keep the test options at the top
+# of the file. You can even place the DEFAULTS tag between
+# test cases (but not in the middle of a single test case)
+#
+# TEST_START
+# MIN_CONFIG = /home/test/config-test1
+#
+# DEFAULTS
+# MIN_CONFIG = /home/test/config-default
+#
+# TEST_START ITERATE 10
+#
+# The above will run the first test with MIN_CONFIG set to
+# /home/test/config-test-1. Then 10 tests will be executed
+# with MIN_CONFIG with /home/test/config-default.
+#
+# You can also disable defaults with the SKIP option
+#
+# DEFAULTS SKIP
+# MIN_CONFIG = /home/test/config-use-sometimes
+#
+# DEFAULTS
+# MIN_CONFIG = /home/test/config-most-times
+#
+# The above will ignore the first MIN_CONFIG. If you want to
+# use the first MIN_CONFIG, remove the SKIP from the first
+# DEFAULTS tag and add it to the second. Be careful, options
+# may only be declared once per test or default. If you have
+# the same option name under the same test or as default
+# ktest will fail to execute, and no tests will run.
+#
+# DEFAULTS OVERRIDE
+#
+# Options defined in the DEFAULTS section can not be duplicated
+# even if they are defined in two different DEFAULT sections.
+# This is done to catch mistakes where an option is added but
+# the previous option was forgotten about and not commented.
+#
+# The OVERRIDE keyword can be added to a section to allow this
+# section to override other DEFAULT sections values that have
+# been defined previously. It will only override options that
+# have been defined before its use. Options defined later
+# in a non override section will still error. The same option
+# can not be defined in the same section even if that section
+# is marked OVERRIDE.
+#
+#
+#
+# Both TEST_START and DEFAULTS sections can also have the IF keyword
+# The value after the IF must evaluate into a 0 or non 0 positive
+# integer, and can use the config variables (explained below).
+#
+# DEFAULTS IF ${IS_X86_32}
+#
+# The above will process the DEFAULTS section if the config
+# variable IS_X86_32 evaluates to a non zero positive integer
+# otherwise if it evaluates to zero, it will act the same
+# as if the SKIP keyword was used.
+#
+# The ELSE keyword can be used directly after a section with
+# a IF statement.
+#
+# TEST_START IF ${RUN_NET_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network
+#
+# ELSE
+#
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-normal
+#
+#
+# The ELSE keyword can also contain an IF statement to allow multiple
+# if then else sections. But all the sections must be either
+# DEFAULT or TEST_START, they can not be a mixture.
+#
+# TEST_START IF ${RUN_NET_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network
+#
+# ELSE IF ${RUN_DISK_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-tests
+#
+# ELSE IF ${RUN_CPU_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-cpu
+#
+# ELSE
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network
+#
+# The if statement may also have comparisons that will and for
+# == and !=, strings may be used for both sides.
+#
+# BOX_TYPE := x86_32
+#
+# DEFAULTS IF ${BOX_TYPE} == x86_32
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-32
+# ELSE
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-64
+#
+# The DEFINED keyword can be used by the IF statements too.
+# It returns true if the given config variable or option has been defined
+# or false otherwise.
+#
+#
+# DEFAULTS IF DEFINED USE_CC
+# CC := ${USE_CC}
+# ELSE
+# CC := gcc
+#
+#
+# As well as NOT DEFINED.
+#
+# DEFAULTS IF NOT DEFINED MAKE_CMD
+# MAKE_CMD := make ARCH=x86
+#
+#
+# And/or ops (&&,||) may also be used to make complex conditionals.
+#
+# TEST_START IF (DEFINED ALL_TESTS || ${MYTEST} == boottest) && ${MACHINE} == gandalf
+#
+# Notice the use of parentheses. Without any parentheses the above would be
+# processed the same as:
+#
+# TEST_START IF DEFINED ALL_TESTS || (${MYTEST} == boottest && ${MACHINE} == gandalf)
+#
+#
+#
+# INCLUDE file
+#
+# The INCLUDE keyword may be used in DEFAULT sections. This will
+# read another config file and process that file as well. The included
+# file can include other files, add new test cases or default
+# statements. Config variables will be passed to these files and changes
+# to config variables will be seen by top level config files. Including
+# a file is processed just like the contents of the file was cut and pasted
+# into the top level file, except, that include files that end with
+# TEST_START sections will have that section ended at the end of
+# the include file. That is, an included file is included followed
+# by another DEFAULT keyword.
+#
+# Unlike other files referenced in this config, the file path does not need
+# to be absolute. If the file does not start with '/', then the directory
+# that the current config file was located in is used. If no config by the
+# given name is found there, then the current directory is searched.
+#
+# INCLUDE myfile
+# DEFAULT
+#
+# is the same as:
+#
+# INCLUDE myfile
+#
+# Note, if the include file does not contain a full path, the file is
+# searched first by the location of the original include file, and then
+# by the location that ktest.pl was executed in.
+#
+
+#### Config variables ####
+#
+# This config file can also contain "config variables".
+# These are assigned with ":=" instead of the ktest option
+# assigment "=".
+#
+# The difference between ktest options and config variables
+# is that config variables can be used multiple times,
+# where each instance will override the previous instance.
+# And that they only live at time of processing this config.
+#
+# The advantage to config variables are that they can be used
+# by any option or any other config variables to define thing
+# that you may use over and over again in the options.
+#
+# For example:
+#
+# USER := root
+# TARGET := mybox
+# TEST_CASE := ssh ${USER}@${TARGET} /path/to/my/test
+#
+# TEST_START
+# MIN_CONFIG = config1
+# TEST = ${TEST_CASE}
+#
+# TEST_START
+# MIN_CONFIG = config2
+# TEST = ${TEST_CASE}
+#
+# TEST_CASE := ssh ${USER}@${TARGET} /path/to/my/test2
+#
+# TEST_START
+# MIN_CONFIG = config1
+# TEST = ${TEST_CASE}
+#
+# TEST_START
+# MIN_CONFIG = config2
+# TEST = ${TEST_CASE}
+#
+# TEST_DIR := /home/me/test
+#
+# BUILD_DIR = ${TEST_DIR}/linux.git
+# OUTPUT_DIR = ${TEST_DIR}/test
+#
+# Note, the config variables are evaluated immediately, thus
+# updating TARGET after TEST_CASE has been assigned does nothing
+# to TEST_CASE.
+#
+# As shown in the example, to evaluate a config variable, you
+# use the ${X} convention. Simple $X will not work.
+#
+# If the config variable does not exist, the ${X} will not
+# be evaluated. Thus:
+#
+# MAKE_CMD = PATH=/mypath:${PATH} make
+#
+# If PATH is not a config variable, then the ${PATH} in
+# the MAKE_CMD option will be evaluated by the shell when
+# the MAKE_CMD option is passed into shell processing.
+
+#### Using options in other options ####
+#
+# Options that are defined in the config file may also be used
+# by other options. All options are evaulated at time of
+# use (except that config variables are evaluated at config
+# processing time).
+#
+# If an ktest option is used within another option, instead of
+# typing it again in that option you can simply use the option
+# just like you can config variables.
+#
+# MACHINE = mybox
+#
+# TEST = ssh root@${MACHINE} /path/to/test
+#
+# The option will be used per test case. Thus:
+#
+# TEST_TYPE = test
+# TEST = ssh root@{MACHINE}
+#
+# TEST_START
+# MACHINE = box1
+#
+# TEST_START
+# MACHINE = box2
+#
+# For both test cases, MACHINE will be evaluated at the time
+# of the test case. The first test will run ssh root@box1
+# and the second will run ssh root@box2.
+
+#### Mandatory Default Options ####
+
+# These options must be in the default section, although most
+# may be overridden by test options.
+
+# The machine hostname that you will test
+#MACHINE = target
+
+# The box is expected to have ssh on normal bootup, provide the user
+# (most likely root, since you need privileged operations)
+#SSH_USER = root
+
+# The directory that contains the Linux source code
+#BUILD_DIR = /home/test/linux.git
+
+# The directory that the objects will be built
+# (can not be same as BUILD_DIR)
+#OUTPUT_DIR = /home/test/build/target
+
+# The location of the compiled file to copy to the target
+# (relative to OUTPUT_DIR)
+#BUILD_TARGET = arch/x86/boot/bzImage
+
+# The place to put your image on the test machine
+#TARGET_IMAGE = /boot/vmlinuz-test
+
+# A script or command to reboot the box
+#
+# Here is a digital loggers power switch example
+#POWER_CYCLE = wget --no-proxy -O /dev/null -q --auth-no-challenge 'http://admin:admin@power/outlet?5=CCL'
+#
+# Here is an example to reboot a virtual box on the current host
+# with the name "Guest".
+#POWER_CYCLE = virsh destroy Guest; sleep 5; virsh start Guest
+
+# The script or command that reads the console
+#
+# If you use ttywatch server, something like the following would work.
+#CONSOLE = nc -d localhost 3001
+#
+# For a virtual machine with guest name "Guest".
+#CONSOLE = virsh console Guest
+
+# Signal to send to kill console.
+# ktest.pl will create a child process to monitor the console.
+# When the console is finished, ktest will kill the child process
+# with this signal.
+# (default INT)
+#CLOSE_CONSOLE_SIGNAL = HUP
+
+# Required version ending to differentiate the test
+# from other linux builds on the system.
+#LOCALVERSION = -test
+
+# For REBOOT_TYPE = grub2, you must specify where the grub.cfg
+# file is. This is the file that is searched to find the menu
+# option to boot to with GRUB_REBOOT
+#GRUB_FILE = /boot/grub2/grub.cfg
+
+# The tool for REBOOT_TYPE = grub2 to set the next reboot kernel
+# to boot into (one shot mode).
+# (default grub2_reboot)
+#GRUB_REBOOT = grub2_reboot
+
+# The grub title name for the test kernel to boot
+# (Only mandatory if REBOOT_TYPE = grub or grub2)
+#
+# Note, ktest.pl will not update the grub menu.lst, you need to
+# manually add an option for the test. ktest.pl will search
+# the grub menu.lst for this option to find what kernel to
+# reboot into.
+#
+# For example, if in the /boot/grub/menu.lst the test kernel title has:
+# title Test Kernel
+# kernel vmlinuz-test
+#
+# For grub2, a search of top level "menuentry"s are done. No
+# submenu is searched. The menu is found by searching for the
+# contents of GRUB_MENU in the line that starts with "menuentry".
+# You may want to include the quotes around the option. For example:
+# for: menuentry 'Test Kernel'
+# do a: GRUB_MENU = 'Test Kernel'
+# For customizing, add your entry in /etc/grub.d/40_custom.
+#
+#GRUB_MENU = Test Kernel
+
+# For REBOOT_TYPE = syslinux, the name of the syslinux executable
+# (on the target) to use to set up the next reboot to boot the
+# test kernel.
+# (default extlinux)
+#SYSLINUX = syslinux
+
+# For REBOOT_TYPE = syslinux, the path that is passed to to the
+# syslinux command where syslinux is installed.
+# (default /boot/extlinux)
+#SYSLINUX_PATH = /boot/syslinux
+
+# For REBOOT_TYPE = syslinux, the syslinux label that references the
+# test kernel in the syslinux config file.
+# (default undefined)
+#SYSLINUX_LABEL = "test-kernel"
+
+# A script to reboot the target into the test kernel
+# This and SWITCH_TO_TEST are about the same, except
+# SWITCH_TO_TEST is run even for REBOOT_TYPE = grub.
+# This may be left undefined.
+# (default undefined)
+#REBOOT_SCRIPT =
+
+#### Optional Config Options (all have defaults) ####
+
+# Email options for receiving notifications. Users must setup
+# the specified mailer prior to using this feature.
+#
+# (default undefined)
+#MAILTO =
+#
+# Supported mailers: sendmail, mail, mailx
+# (default sendmail)
+#MAILER = sendmail
+#
+# The executable to run
+# (default: for sendmail "/usr/sbin/sendmail", otherwise equals ${MAILER})
+#MAIL_EXEC = /usr/sbin/sendmail
+#
+# The command used to send mail, which uses the above options
+# can be modified. By default if the mailer is "sendmail" then
+# MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO
+# For mail or mailx:
+# MAIL_COMMAND = "$MAIL_PATH/$MAILER -s \'$SUBJECT\' $MAILTO <<< \'$MESSAGE\'
+# ktest.pl will do the substitution for MAIL_PATH, MAILER, MAILTO at the time
+# it sends the mail if "$FOO" format is used. If "${FOO}" format is used,
+# then the substitutions will occur at the time the config file is read.
+# But note, MAIL_PATH and MAILER require being set by the config file if
+# ${MAIL_PATH} or ${MAILER} are used, but not if $MAIL_PATH or $MAILER are.
+#MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO
+#
+# Errors are defined as those would terminate the script
+# (default 1)
+#EMAIL_ON_ERROR = 1
+# (default 1)
+#EMAIL_WHEN_FINISHED = 1
+# (default 0)
+#EMAIL_WHEN_STARTED = 1
+#
+# Users can cancel the test by Ctrl^C
+# (default 0)
+#EMAIL_WHEN_CANCELED = 1
+
+# Start a test setup. If you leave this off, all options
+# will be default and the test will run once.
+# This is a label and not really an option (it takes no value).
+# You can append ITERATE and a number after it to iterate the
+# test a number of times, or SKIP to ignore this test.
+#
+#TEST_START
+#TEST_START ITERATE 5
+#TEST_START SKIP
+
+# Have the following options as default again. Used after tests
+# have already been defined by TEST_START. Optionally, you can
+# just define all default options before the first TEST_START
+# and you do not need this option.
+#
+# This is a label and not really an option (it takes no value).
+# You can append SKIP to this label and the options within this
+# section will be ignored.
+#
+# DEFAULTS
+# DEFAULTS SKIP
+
+# If you want to execute some command before the first test runs
+# you can set this option. Note, it can be set as a default option
+# or an option in the first test case. All other test cases will
+# ignore it. If both the default and first test have this option
+# set, then the first test will take precedence.
+#
+# default (undefined)
+#PRE_KTEST = ${SSH} ~/set_up_test
+
+# If you want to execute some command after all the tests have
+# completed, you can set this option. Note, it can be set as a
+# default or any test case can override it. If multiple test cases
+# set this option, then the last test case that set it will take
+# precedence
+#
+# default (undefined)
+#POST_KTEST = ${SSH} ~/dismantle_test
+
+# The default test type (default test)
+# The test types may be:
+# build - only build the kernel, do nothing else
+# install - build and install, but do nothing else (does not reboot)
+# boot - build, install, and boot the kernel
+# test - build, boot and if TEST is set, run the test script
+# (If TEST is not set, it defaults back to boot)
+# bisect - Perform a bisect on the kernel (see BISECT_TYPE below)
+# patchcheck - Do a test on a series of commits in git (see PATCHCHECK below)
+#TEST_TYPE = test
+
+# Test to run if there is a successful boot and TEST_TYPE is test.
+# Must exit with 0 on success and non zero on error
+# default (undefined)
+#TEST = ssh user@machine /root/run_test
+
+# The build type is any make config type or special command
+# (default randconfig)
+# nobuild - skip the clean and build step
+# useconfig:/path/to/config - use the given config and run
+# oldconfig on it.
+# This option is ignored if TEST_TYPE is patchcheck or bisect
+#BUILD_TYPE = randconfig
+
+# The make command (default make)
+# If you are building a 32bit x86 on a 64 bit host
+#MAKE_CMD = CC=i386-gcc AS=i386-as make ARCH=i386
+
+# Any build options for the make of the kernel (not for other makes, like configs)
+# (default "")
+#BUILD_OPTIONS = -j20
+
+# If you need to do some special handling before installing
+# you can add a script with this option.
+# The environment variable KERNEL_VERSION will be set to the
+# kernel version that is used.
+#
+# default (undefined)
+#PRE_INSTALL = ssh user@target rm -rf '/lib/modules/*-test*'
+
+# If you need an initrd, you can add a script or code here to install
+# it. The environment variable KERNEL_VERSION will be set to the
+# kernel version that is used. Remember to add the initrd line
+# to your grub menu.lst file.
+#
+# Here's a couple of examples to use:
+#POST_INSTALL = ssh user@target /sbin/mkinitrd --allow-missing -f /boot/initramfs-test.img $KERNEL_VERSION
+#
+# or on some systems:
+#POST_INSTALL = ssh user@target /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION
+
+# If for some reason you just want to boot the kernel and you do not
+# want the test to install anything new. For example, you may just want
+# to boot test the same kernel over and over and do not want to go through
+# the hassle of installing anything, you can set this option to 1
+# (default 0)
+#NO_INSTALL = 1
+
+# If there is a command that you want to run before the individual test
+# case executes, then you can set this option
+#
+# default (undefined)
+#PRE_TEST = ${SSH} reboot_to_special_kernel
+
+# If there is a command you want to run after the individual test case
+# completes, then you can set this option.
+#
+# default (undefined)
+#POST_TEST = cd ${BUILD_DIR}; git reset --hard
+
+# If there is a script that you require to run before the build is done
+# you can specify it with PRE_BUILD.
+#
+# One example may be if you must add a temporary patch to the build to
+# fix a unrelated bug to perform a patchcheck test. This will apply the
+# patch before each build that is made. Use the POST_BUILD to do a git reset --hard
+# to remove the patch.
+#
+# (default undef)
+#PRE_BUILD = cd ${BUILD_DIR} && patch -p1 < /tmp/temp.patch
+
+# To specify if the test should fail if the PRE_BUILD fails,
+# PRE_BUILD_DIE needs to be set to 1. Otherwise the PRE_BUILD
+# result is ignored.
+# (default 0)
+# PRE_BUILD_DIE = 1
+
+# If there is a script that should run after the build is done
+# you can specify it with POST_BUILD.
+#
+# As the example in PRE_BUILD, POST_BUILD can be used to reset modifications
+# made by the PRE_BUILD.
+#
+# (default undef)
+#POST_BUILD = cd ${BUILD_DIR} && git reset --hard
+
+# To specify if the test should fail if the POST_BUILD fails,
+# POST_BUILD_DIE needs to be set to 1. Otherwise the POST_BUILD
+# result is ignored.
+# (default 0)
+#POST_BUILD_DIE = 1
+
+# Way to reboot the box to the test kernel.
+# Only valid options so far are "grub", "grub2", "syslinux" and "script"
+# (default grub)
+# If you specify grub, it will assume grub version 1
+# and will search in /boot/grub/menu.lst for the title $GRUB_MENU
+# and select that target to reboot to the kernel. If this is not
+# your setup, then specify "script" and have a command or script
+# specified in REBOOT_SCRIPT to boot to the target.
+#
+# For REBOOT_TYPE = grub2, you must define both GRUB_MENU and
+# GRUB_FILE.
+#
+# For REBOOT_TYPE = syslinux, you must define SYSLINUX_LABEL, and
+# perhaps modify SYSLINUX (default extlinux) and SYSLINUX_PATH
+# (default /boot/extlinux)
+#
+# The entry in /boot/grub/menu.lst must be entered in manually.
+# The test will not modify that file.
+#REBOOT_TYPE = grub
+
+# If you are using a machine that doesn't boot with grub, and
+# perhaps gets its kernel from a remote server (tftp), then
+# you can use this option to update the target image with the
+# test image.
+#
+# You could also do the same with POST_INSTALL, but the difference
+# between that option and this option is that POST_INSTALL runs
+# after the install, where this one runs just before a reboot.
+# (default undefined)
+#SWITCH_TO_TEST = cp ${OUTPUT_DIR}/${BUILD_TARGET} ${TARGET_IMAGE}
+
+# If you are using a machine that doesn't boot with grub, and
+# perhaps gets its kernel from a remote server (tftp), then
+# you can use this option to update the target image with the
+# the known good image to reboot safely back into.
+#
+# This option holds a command that will execute before needing
+# to reboot to a good known image.
+# (default undefined)
+#SWITCH_TO_GOOD = ssh ${SSH_USER}/${MACHINE} cp good_image ${TARGET_IMAGE}
+
+# The min config that is needed to build for the machine
+# A nice way to create this is with the following:
+#
+# $ ssh target
+# $ lsmod > mymods
+# $ scp mymods host:/tmp
+# $ exit
+# $ cd linux.git
+# $ rm .config
+# $ make LSMOD=mymods localyesconfig
+# $ grep '^CONFIG' .config > /home/test/config-min
+#
+# If you want even less configs:
+#
+# log in directly to target (do not ssh)
+#
+# $ su
+# # lsmod | cut -d' ' -f1 | xargs rmmod
+#
+# repeat the above several times
+#
+# # lsmod > mymods
+# # reboot
+#
+# May need to reboot to get your network back to copy the mymods
+# to the host, and then remove the previous .config and run the
+# localyesconfig again. The CONFIG_MIN generated like this will
+# not guarantee network activity to the box so the TEST_TYPE of
+# test may fail.
+#
+# You might also want to set:
+# CONFIG_CMDLINE="<your options here>"
+# randconfig may set the above and override your real command
+# line options.
+# (default undefined)
+#MIN_CONFIG = /home/test/config-min
+
+# Sometimes there's options that just break the boot and
+# you do not care about. Here are a few:
+# # CONFIG_STAGING is not set
+# Staging drivers are horrible, and can break the build.
+# # CONFIG_SCSI_DEBUG is not set
+# SCSI_DEBUG may change your root partition
+# # CONFIG_KGDB_SERIAL_CONSOLE is not set
+# KGDB may cause oops waiting for a connection that's not there.
+# This option points to the file containing config options that will be prepended
+# to the MIN_CONFIG (or be the MIN_CONFIG if it is not set)
+#
+# Note, config options in MIN_CONFIG will override these options.
+#
+# (default undefined)
+#ADD_CONFIG = /home/test/config-broken
+
+# The location on the host where to write temp files
+# (default /tmp/ktest/${MACHINE})
+#TMP_DIR = /tmp/ktest/${MACHINE}
+
+# Optional log file to write the status (recommended)
+# Note, this is a DEFAULT section only option.
+# (default undefined)
+#LOG_FILE = /home/test/logfiles/target.log
+
+# Remove old logfile if it exists before starting all tests.
+# Note, this is a DEFAULT section only option.
+# (default 0)
+#CLEAR_LOG = 0
+
+# Line to define a successful boot up in console output.
+# This is what the line contains, not the entire line. If you need
+# the entire line to match, then use regural expression syntax like:
+# (do not add any quotes around it)
+#
+# SUCCESS_LINE = ^MyBox Login:$
+#
+# (default "login:")
+#SUCCESS_LINE = login:
+
+# To speed up between reboots, defining a line that the
+# default kernel produces that represents that the default
+# kernel has successfully booted and can be used to pass
+# a new test kernel to it. Otherwise ktest.pl will wait till
+# SLEEP_TIME to continue.
+# (default undefined)
+#REBOOT_SUCCESS_LINE = login:
+
+# In case the console constantly fills the screen, having
+# a specified time to stop the test after success is recommended.
+# (in seconds)
+# (default 10)
+#STOP_AFTER_SUCCESS = 10
+
+# In case the console constantly fills the screen, having
+# a specified time to stop the test after failure is recommended.
+# (in seconds)
+# (default 60)
+#STOP_AFTER_FAILURE = 60
+
+# In case the console constantly fills the screen, having
+# a specified time to stop the test if it never succeeds nor fails
+# is recommended.
+# Note: this is ignored if a success or failure is detected.
+# (in seconds)
+# (default 600, -1 is to never stop)
+#STOP_TEST_AFTER = 600
+
+# Stop testing if a build fails. If set, the script will end if
+# a failure is detected, otherwise it will save off the .config,
+# dmesg and bootlog in a directory called
+# MACHINE-TEST_TYPE_BUILD_TYPE-fail-yyyymmddhhmmss
+# if the STORE_FAILURES directory is set.
+# (default 1)
+# Note, even if this is set to zero, there are some errors that still
+# stop the tests.
+#DIE_ON_FAILURE = 1
+
+# Directory to store failure directories on failure. If this is not
+# set, DIE_ON_FAILURE=0 will not save off the .config, dmesg and
+# bootlog. This option is ignored if DIE_ON_FAILURE is not set.
+# (default undefined)
+#STORE_FAILURES = /home/test/failures
+
+# Directory to store success directories on success. If this is not
+# set, the .config, dmesg and bootlog will not be saved if a
+# test succeeds.
+# (default undefined)
+#STORE_SUCCESSES = /home/test/successes
+
+# Build without doing a make mrproper, or removing .config
+# (default 0)
+#BUILD_NOCLEAN = 0
+
+# As the test reads the console, after it hits the SUCCESS_LINE
+# the time it waits for the monitor to settle down between reads
+# can usually be lowered.
+# (in seconds) (default 1)
+#BOOTED_TIMEOUT = 1
+
+# The timeout in seconds when we consider the box hung after
+# the console stop producing output. Be sure to leave enough
+# time here to get pass a reboot. Some machines may not produce
+# any console output for a long time during a reboot. You do
+# not want the test to fail just because the system was in
+# the process of rebooting to the test kernel.
+# (default 120)
+#TIMEOUT = 120
+
+# The timeout in seconds when to test if the box can be rebooted
+# or not. Before issuing the reboot command, a ssh connection
+# is attempted to see if the target machine is still active.
+# If the target does not connect within this timeout, a power cycle
+# is issued instead of a reboot.
+# CONNECT_TIMEOUT = 25
+
+# In between tests, a reboot of the box may occur, and this
+# is the time to wait for the console after it stops producing
+# output. Some machines may not produce a large lag on reboot
+# so this should accommodate it.
+# The difference between this and TIMEOUT, is that TIMEOUT happens
+# when rebooting to the test kernel. This sleep time happens
+# after a test has completed and we are about to start running
+# another test. If a reboot to the reliable kernel happens,
+# we wait SLEEP_TIME for the console to stop producing output
+# before starting the next test.
+#
+# You can speed up reboot times even more by setting REBOOT_SUCCESS_LINE.
+# (default 60)
+#SLEEP_TIME = 60
+
+# The time in between bisects to sleep (in seconds)
+# (default 60)
+#BISECT_SLEEP_TIME = 60
+
+# The max wait time (in seconds) for waiting for the console to finish.
+# If for some reason, the console is outputting content without
+# ever finishing, this will cause ktest to get stuck. This
+# option is the max time ktest will wait for the monitor (console)
+# to settle down before continuing.
+# (default 1800)
+#MAX_MONITOR_WAIT
+
+# The time in between patch checks to sleep (in seconds)
+# (default 60)
+#PATCHCHECK_SLEEP_TIME = 60
+
+# Reboot the target box on error (default 0)
+#REBOOT_ON_ERROR = 0
+
+# Power off the target on error (ignored if REBOOT_ON_ERROR is set)
+# Note, this is a DEFAULT section only option.
+# (default 0)
+#POWEROFF_ON_ERROR = 0
+
+# Power off the target after all tests have completed successfully
+# Note, this is a DEFAULT section only option.
+# (default 0)
+#POWEROFF_ON_SUCCESS = 0
+
+# Reboot the target after all test completed successfully (default 1)
+# (ignored if POWEROFF_ON_SUCCESS is set)
+#REBOOT_ON_SUCCESS = 1
+
+# In case there are isses with rebooting, you can specify this
+# to always powercycle after this amount of time after calling
+# reboot.
+# Note, POWERCYCLE_AFTER_REBOOT = 0 does NOT disable it. It just
+# makes it powercycle immediately after rebooting. Do not define
+# it if you do not want it.
+# (default undefined)
+#POWERCYCLE_AFTER_REBOOT = 5
+
+# In case there's isses with halting, you can specify this
+# to always poweroff after this amount of time after calling
+# halt.
+# Note, POWEROFF_AFTER_HALT = 0 does NOT disable it. It just
+# makes it poweroff immediately after halting. Do not define
+# it if you do not want it.
+# (default undefined)
+#POWEROFF_AFTER_HALT = 20
+
+# A script or command to power off the box (default undefined)
+# Needed for POWEROFF_ON_ERROR and SUCCESS
+#
+# Example for digital loggers power switch:
+#POWER_OFF = wget --no-proxy -O /dev/null -q --auth-no-challenge 'http://admin:admin@power/outlet?5=OFF'
+#
+# Example for a virtual guest call "Guest".
+#POWER_OFF = virsh destroy Guest
+
+# To have the build fail on "new" warnings, create a file that
+# contains a list of all known warnings (they must match exactly
+# to the line with 'warning:', 'error:' or 'Error:'. If the option
+# WARNINGS_FILE is set, then that file will be read, and if the
+# build detects a warning, it will examine this file and if the
+# warning does not exist in it, it will fail the build.
+#
+# Note, if this option is defined to a file that does not exist
+# then any warning will fail the build.
+# (see make_warnings_file below)
+#
+# (optional, default undefined)
+#WARNINGS_FILE = ${OUTPUT_DIR}/warnings_file
+
+# The way to execute a command on the target
+# (default ssh $SSH_USER@$MACHINE $SSH_COMMAND";)
+# The variables SSH_USER, MACHINE and SSH_COMMAND are defined
+#SSH_EXEC = ssh $SSH_USER@$MACHINE $SSH_COMMAND";
+
+# The way to copy a file to the target (install and modules)
+# (default scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE)
+# The variables SSH_USER, MACHINE are defined by the config
+# SRC_FILE and DST_FILE are ktest internal variables and
+# should only have '$' and not the '${}' notation.
+# (default scp $SRC_FILE ${SSH_USER}@${MACHINE}:$DST_FILE)
+#SCP_TO_TARGET = echo skip scp for $SRC_FILE $DST_FILE
+
+# If install needs to be different than modules, then this
+# option will override the SCP_TO_TARGET for installation.
+# (default ${SCP_TO_TARGET} )
+#SCP_TO_TARGET_INSTALL = scp $SRC_FILE tftp@tftpserver:$DST_FILE
+
+# The nice way to reboot the target
+# (default ssh $SSH_USER@$MACHINE reboot)
+# The variables SSH_USER and MACHINE are defined.
+#REBOOT = ssh $SSH_USER@$MACHINE reboot
+
+# The way triple faults are detected is by testing the kernel
+# banner. If the kernel banner for the kernel we are testing is
+# found, and then later a kernel banner for another kernel version
+# is found, it is considered that we encountered a triple fault,
+# and there is no panic or callback, but simply a reboot.
+# To disable this (because it did a false positive) set the following
+# to 0.
+# (default 1)
+#DETECT_TRIPLE_FAULT = 0
+
+# All options in the config file should be either used by ktest
+# or could be used within a value of another option. If an option
+# in the config file is not used, ktest will warn about it and ask
+# if you want to continue.
+#
+# If you don't care if there are non-used options, enable this
+# option. Be careful though, a non-used option is usually a sign
+# of an option name being typed incorrectly.
+# (default 0)
+#IGNORE_UNUSED = 1
+
+# When testing a kernel that happens to have WARNINGs, and call
+# traces, ktest.pl will detect these and fail a boot or test run
+# due to warnings. By setting this option, ktest will ignore
+# call traces, and will not fail a test if the kernel produces
+# an oops. Use this option with care.
+# (default 0)
+#IGNORE_ERRORS = 1
+
+#### Per test run options ####
+# The following options are only allowed in TEST_START sections.
+# They are ignored in the DEFAULTS sections.
+#
+# All of these are optional and undefined by default, although
+# some of these options are required for TEST_TYPE of patchcheck
+# and bisect.
+#
+#
+# CHECKOUT = branch
+#
+# If the BUILD_DIR is a git repository, then you can set this option
+# to checkout the given branch before running the TEST. If you
+# specify this for the first run, that branch will be used for
+# all preceding tests until a new CHECKOUT is set.
+#
+#
+# TEST_NAME = name
+#
+# If you want the test to have a name that is displayed in
+# the test result banner at the end of the test, then use this
+# option. This is useful to search for the RESULT keyword and
+# not have to translate a test number to a test in the config.
+#
+# For TEST_TYPE = patchcheck
+#
+# This expects the BUILD_DIR to be a git repository, and
+# will checkout the PATCHCHECK_START commit.
+#
+# The option BUILD_TYPE will be ignored.
+#
+# The MIN_CONFIG will be used for all builds of the patchcheck. The build type
+# used for patchcheck is oldconfig.
+#
+# PATCHCHECK_START is required and is the first patch to
+# test (the SHA1 of the commit). You may also specify anything
+# that git checkout allows (branch name, tage, HEAD~3).
+#
+# PATCHCHECK_END is the last patch to check (default HEAD)
+#
+# PATCHCHECK_CHERRY if set to non zero, then git cherry will be
+# performed against PATCHCHECK_START and PATCHCHECK_END. That is
+#
+# git cherry ${PATCHCHECK_START} ${PATCHCHECK_END}
+#
+# Then the changes found will be tested.
+#
+# Note, PATCHCHECK_CHERRY requires PATCHCHECK_END to be defined.
+# (default 0)
+#
+# PATCHCHECK_TYPE is required and is the type of test to run:
+# build, boot, test.
+#
+# Note, the build test will look for warnings, if a warning occurred
+# in a file that a commit touches, the build will fail, unless
+# IGNORE_WARNINGS is set for the given commit's sha1
+#
+# IGNORE_WARNINGS can be used to disable the failure of patchcheck
+# on a particuler commit (SHA1). You can add more than one commit
+# by adding a list of SHA1s that are space delimited.
+#
+# If BUILD_NOCLEAN is set, then make mrproper will not be run on
+# any of the builds, just like all other TEST_TYPE tests. But
+# what makes patchcheck different from the other tests, is if
+# BUILD_NOCLEAN is not set, only the first and last patch run
+# make mrproper. This helps speed up the test.
+#
+# Example:
+# TEST_START
+# TEST_TYPE = patchcheck
+# CHECKOUT = mybranch
+# PATCHCHECK_TYPE = boot
+# PATCHCHECK_START = 747e94ae3d1b4c9bf5380e569f614eb9040b79e7
+# PATCHCHECK_END = HEAD~2
+# IGNORE_WARNINGS = 42f9c6b69b54946ffc0515f57d01dc7f5c0e4712 0c17ca2c7187f431d8ffc79e81addc730f33d128
+#
+#
+#
+# For TEST_TYPE = bisect
+#
+# You can specify a git bisect if the BUILD_DIR is a git repository.
+# The MIN_CONFIG will be used for all builds of the bisect. The build type
+# used for bisecting is oldconfig.
+#
+# The option BUILD_TYPE will be ignored.
+#
+# BISECT_TYPE is the type of test to perform:
+# build - bad fails to build
+# boot - bad builds but fails to boot
+# test - bad boots but fails a test
+#
+# BISECT_GOOD is the commit (SHA1) to label as good (accepts all git good commit types)
+# BISECT_BAD is the commit to label as bad (accepts all git bad commit types)
+#
+# The above three options are required for a bisect operation.
+#
+# BISECT_REPLAY = /path/to/replay/file (optional, default undefined)
+#
+# If an operation failed in the bisect that was not expected to
+# fail. Then the test ends. The state of the BUILD_DIR will be
+# left off at where the failure occurred. You can examine the
+# reason for the failure, and perhaps even find a git commit
+# that would work to continue with. You can run:
+#
+# git bisect log > /path/to/replay/file
+#
+# The adding:
+#
+# BISECT_REPLAY= /path/to/replay/file
+#
+# And running the test again. The test will perform the initial
+# git bisect start, git bisect good, and git bisect bad, and
+# then it will run git bisect replay on this file, before
+# continuing with the bisect.
+#
+# BISECT_START = commit (optional, default undefined)
+#
+# As with BISECT_REPLAY, if the test failed on a commit that
+# just happen to have a bad commit in the middle of the bisect,
+# and you need to skip it. If BISECT_START is defined, it
+# will checkout that commit after doing the initial git bisect start,
+# git bisect good, git bisect bad, and running the git bisect replay
+# if the BISECT_REPLAY is set.
+#
+# BISECT_SKIP = 1 (optional, default 0)
+#
+# If BISECT_TYPE is set to test but the build fails, ktest will
+# simply fail the test and end their. You could use BISECT_REPLAY
+# and BISECT_START to resume after you found a new starting point,
+# or you could set BISECT_SKIP to 1. If BISECT_SKIP is set to 1,
+# when something other than the BISECT_TYPE fails, ktest.pl will
+# run "git bisect skip" and try again.
+#
+# BISECT_FILES = <path> (optional, default undefined)
+#
+# To just run the git bisect on a specific path, set BISECT_FILES.
+# For example:
+#
+# BISECT_FILES = arch/x86 kernel/time
+#
+# Will run the bisect with "git bisect start -- arch/x86 kernel/time"
+#
+# BISECT_REVERSE = 1 (optional, default 0)
+#
+# In those strange instances where it was broken forever
+# and you are trying to find where it started to work!
+# Set BISECT_GOOD to the commit that was last known to fail
+# Set BISECT_BAD to the commit that is known to start working.
+# With BISECT_REVERSE = 1, The test will consider failures as
+# good, and success as bad.
+#
+# BISECT_MANUAL = 1 (optional, default 0)
+#
+# In case there's a problem with automating the bisect for
+# whatever reason. (Can't reboot, want to inspect each iteration)
+# Doing a BISECT_MANUAL will have the test wait for you to
+# tell it if the test passed or failed after each iteration.
+# This is basicall the same as running git bisect yourself
+# but ktest will rebuild and install the kernel for you.
+#
+# BISECT_CHECK = 1 (optional, default 0)
+#
+# Just to be sure the good is good and bad is bad, setting
+# BISECT_CHECK to 1 will start the bisect by first checking
+# out BISECT_BAD and makes sure it fails, then it will check
+# out BISECT_GOOD and makes sure it succeeds before starting
+# the bisect (it works for BISECT_REVERSE too).
+#
+# You can limit the test to just check BISECT_GOOD or
+# BISECT_BAD with BISECT_CHECK = good or
+# BISECT_CHECK = bad, respectively.
+#
+# BISECT_TRIES = 5 (optional, default 1)
+#
+# For those cases that it takes several tries to hit a bug,
+# the BISECT_TRIES is useful. It is the number of times the
+# test is ran before it says the kernel is good. The first failure
+# will stop trying and mark the current SHA1 as bad.
+#
+# Note, as with all race bugs, there's no guarantee that if
+# it succeeds, it is really a good bisect. But it helps in case
+# the bug is some what reliable.
+#
+# You can set BISECT_TRIES to zero, and all tests will be considered
+# good, unless you also set BISECT_MANUAL.
+#
+# BISECT_RET_GOOD = 0 (optional, default undefined)
+#
+# In case the specificed test returns something other than just
+# 0 for good, and non-zero for bad, you can override 0 being
+# good by defining BISECT_RET_GOOD.
+#
+# BISECT_RET_BAD = 1 (optional, default undefined)
+#
+# In case the specificed test returns something other than just
+# 0 for good, and non-zero for bad, you can override non-zero being
+# bad by defining BISECT_RET_BAD.
+#
+# BISECT_RET_ABORT = 255 (optional, default undefined)
+#
+# If you need to abort the bisect if the test discovers something
+# that was wrong, you can define BISECT_RET_ABORT to be the error
+# code returned by the test in order to abort the bisect.
+#
+# BISECT_RET_SKIP = 2 (optional, default undefined)
+#
+# If the test detects that the current commit is neither good
+# nor bad, but something else happened (another bug detected)
+# you can specify BISECT_RET_SKIP to an error code that the
+# test returns when it should skip the current commit.
+#
+# BISECT_RET_DEFAULT = good (optional, default undefined)
+#
+# You can override the default of what to do when the above
+# options are not hit. This may be one of, "good", "bad",
+# "abort" or "skip" (without the quotes).
+#
+# Note, if you do not define any of the previous BISECT_RET_*
+# and define BISECT_RET_DEFAULT, all bisects results will do
+# what the BISECT_RET_DEFAULT has.
+#
+#
+# Example:
+# TEST_START
+# TEST_TYPE = bisect
+# BISECT_GOOD = v2.6.36
+# BISECT_BAD = b5153163ed580e00c67bdfecb02b2e3843817b3e
+# BISECT_TYPE = build
+# MIN_CONFIG = /home/test/config-bisect
+#
+#
+#
+# For TEST_TYPE = config_bisect
+#
+# In those cases that you have two different configs. One of them
+# work, the other does not, and you do not know what config causes
+# the problem.
+# The TEST_TYPE config_bisect will bisect the bad config looking for
+# what config causes the failure.
+#
+# The way it works is this:
+#
+# You can specify a good config with CONFIG_BISECT_GOOD, otherwise it
+# will use the MIN_CONFIG, and if that's not specified, it will use
+# the config that comes with "make defconfig".
+#
+# It runs both the good and bad configs through a make oldconfig to
+# make sure that they are set up for the kernel that is checked out.
+#
+# It then reads the configs that are set, as well as the ones that are
+# not set for both the good and bad configs, and then compares them.
+# It will set half of the good configs within the bad config (note,
+# "set" means to make the bad config match the good config, a config
+# in the good config that is off, will be turned off in the bad
+# config. That is considered a "set").
+#
+# It tests this new config and if it works, it becomes the new good
+# config, otherwise it becomes the new bad config. It continues this
+# process until there's only one config left and it will report that
+# config.
+#
+# The "bad config" can also be a config that is needed to boot but was
+# disabled because it depended on something that wasn't set.
+#
+# During this process, it saves the current good and bad configs in
+# ${TMP_DIR}/good_config and ${TMP_DIR}/bad_config respectively.
+# If you stop the test, you can copy them to a new location to
+# reuse them again.
+#
+# Although the MIN_CONFIG may be the config it starts with, the
+# MIN_CONFIG is ignored.
+#
+# The option BUILD_TYPE will be ignored.
+#
+# CONFIG_BISECT_TYPE is the type of test to perform:
+# build - bad fails to build
+# boot - bad builds but fails to boot
+# test - bad boots but fails a test
+#
+# CONFIG_BISECT is the config that failed to boot
+#
+# If BISECT_MANUAL is set, it will pause between iterations.
+# This is useful to use just ktest.pl just for the config bisect.
+# If you set it to build, it will run the bisect and you can
+# control what happens in between iterations. It will ask you if
+# the test succeeded or not and continue the config bisect.
+#
+# CONFIG_BISECT_GOOD (optional)
+# If you have a good config to start with, then you
+# can specify it with CONFIG_BISECT_GOOD. Otherwise
+# the MIN_CONFIG is the base, if MIN_CONFIG is not set
+# It will build a config with "make defconfig"
+#
+# CONFIG_BISECT_CHECK (optional)
+# Set this to 1 if you want to confirm that the config ktest
+# generates (the bad config with the min config) is still bad.
+# It may be that the min config fixes what broke the bad config
+# and the test will not return a result.
+# Set it to "good" to test only the good config and set it
+# to "bad" to only test the bad config.
+#
+# CONFIG_BISECT_EXEC (optional)
+# The config bisect is a separate program that comes with ktest.pl.
+# By befault, it will look for:
+# `pwd`/config-bisect.pl # the location ktest.pl was executed from.
+# If it does not find it there, it will look for:
+# `dirname <ktest.pl>`/config-bisect.pl # The directory that holds ktest.pl
+# If it does not find it there, it will look for:
+# ${BUILD_DIR}/tools/testing/ktest/config-bisect.pl
+# Setting CONFIG_BISECT_EXEC will override where it looks.
+#
+# Example:
+# TEST_START
+# TEST_TYPE = config_bisect
+# CONFIG_BISECT_TYPE = build
+# CONFIG_BISECT = /home/test/config-bad
+# MIN_CONFIG = /home/test/config-min
+# BISECT_MANUAL = 1
+#
+#
+#
+# For TEST_TYPE = make_min_config
+#
+# After doing a make localyesconfig, your kernel configuration may
+# not be the most useful minimum configuration. Having a true minimum
+# config that you can use against other configs is very useful if
+# someone else has a config that breaks on your code. By only forcing
+# those configurations that are truly required to boot your machine
+# will give you less of a chance that one of your set configurations
+# will make the bug go away. This will give you a better chance to
+# be able to reproduce the reported bug matching the broken config.
+#
+# Note, this does take some time, and may require you to run the
+# test over night, or perhaps over the weekend. But it also allows
+# you to interrupt it, and gives you the current minimum config
+# that was found till that time.
+#
+# Note, this test automatically assumes a BUILD_TYPE of oldconfig
+# and its test type acts like boot.
+# TODO: add a test version that makes the config do more than just
+# boot, like having network access.
+#
+# To save time, the test does not just grab any option and test
+# it. The Kconfig files are examined to determine the dependencies
+# of the configs. If a config is chosen that depends on another
+# config, that config will be checked first. By checking the
+# parents first, we can eliminate whole groups of configs that
+# may have been enabled.
+#
+# For example, if a USB device config is chosen and depends on CONFIG_USB,
+# the CONFIG_USB will be tested before the device. If CONFIG_USB is
+# found not to be needed, it, as well as all configs that depend on
+# it, will be disabled and removed from the current min_config.
+#
+# OUTPUT_MIN_CONFIG is the path and filename of the file that will
+# be created from the MIN_CONFIG. If you interrupt the test, set
+# this file as your new min config, and use it to continue the test.
+# This file does not need to exist on start of test.
+# This file is not created until a config is found that can be removed.
+# If this file exists, you will be prompted if you want to use it
+# as the min_config (overriding MIN_CONFIG) if START_MIN_CONFIG
+# is not defined.
+# (required field)
+#
+# START_MIN_CONFIG is the config to use to start the test with.
+# you can set this as the same OUTPUT_MIN_CONFIG, but if you do
+# the OUTPUT_MIN_CONFIG file must exist.
+# (default MIN_CONFIG)
+#
+# IGNORE_CONFIG is used to specify a config file that has configs that
+# you already know must be set. Configs are written here that have
+# been tested and proved to be required. It is best to define this
+# file if you intend on interrupting the test and running it where
+# it left off. New configs that it finds will be written to this file
+# and will not be tested again in later runs.
+# (optional)
+#
+# MIN_CONFIG_TYPE can be either 'boot' or 'test'. With 'boot' it will
+# test if the created config can just boot the machine. If this is
+# set to 'test', then the TEST option must be defined and the created
+# config will not only boot the target, but also make sure that the
+# config lets the test succeed. This is useful to make sure the final
+# config that is generated allows network activity (ssh).
+# (optional)
+#
+# USE_OUTPUT_MIN_CONFIG set this to 1 if you do not want to be prompted
+# about using the OUTPUT_MIN_CONFIG as the MIN_CONFIG as the starting
+# point. Set it to 0 if you want to always just use the given MIN_CONFIG.
+# If it is not defined, it will prompt you to pick which config
+# to start with (MIN_CONFIG or OUTPUT_MIN_CONFIG).
+#
+# Example:
+#
+# TEST_TYPE = make_min_config
+# OUTPUT_MIN_CONFIG = /path/to/config-new-min
+# START_MIN_CONFIG = /path/to/config-min
+# IGNORE_CONFIG = /path/to/config-tested
+# MIN_CONFIG_TYPE = test
+# TEST = ssh ${USER}@${MACHINE} echo hi
+#
+#
+#
+#
+# For TEST_TYPE = make_warnings_file
+#
+# If you want the build to fail when a new warning is discovered
+# you set the WARNINGS_FILE to point to a file of known warnings.
+#
+# The test "make_warnings_file" will let you create a new warnings
+# file before you run other tests, like patchcheck.
+#
+# What this test does is to run just a build, you still need to
+# specify BUILD_TYPE to tell the test what type of config to use.
+# A BUILD_TYPE of nobuild will fail this test.
+#
+# The test will do the build and scan for all warnings. Any warning
+# it discovers will be saved in the WARNINGS_FILE (required) option.
+#
+# It is recommended (but not necessary) to make sure BUILD_NOCLEAN is
+# off, so that a full build is done (make mrproper is performed).
+# That way, all warnings will be captured.
+#
+# Example:
+#
+# TEST_TYPE = make_warnings_file
+# WARNINGS_FILE = ${OUTPUT_DIR}
+# BUILD_TYPE = useconfig:oldconfig
+# CHECKOUT = v3.8
+# BUILD_NOCLEAN = 0
+#
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
new file mode 100644
index 000000000..0392153a0
--- /dev/null
+++ b/tools/testing/nvdimm/Kbuild
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: GPL-2.0
+ldflags-y += --wrap=ioremap_wc
+ldflags-y += --wrap=memremap
+ldflags-y += --wrap=devm_ioremap_nocache
+ldflags-y += --wrap=devm_memremap
+ldflags-y += --wrap=devm_memunmap
+ldflags-y += --wrap=ioremap_nocache
+ldflags-y += --wrap=iounmap
+ldflags-y += --wrap=memunmap
+ldflags-y += --wrap=__devm_request_region
+ldflags-y += --wrap=__devm_release_region
+ldflags-y += --wrap=__request_region
+ldflags-y += --wrap=__release_region
+ldflags-y += --wrap=devm_memremap_pages
+ldflags-y += --wrap=insert_resource
+ldflags-y += --wrap=remove_resource
+ldflags-y += --wrap=acpi_evaluate_object
+ldflags-y += --wrap=acpi_evaluate_dsm
+
+DRIVERS := ../../../drivers
+NVDIMM_SRC := $(DRIVERS)/nvdimm
+ACPI_SRC := $(DRIVERS)/acpi/nfit
+DAX_SRC := $(DRIVERS)/dax
+ccflags-y := -I$(src)/$(NVDIMM_SRC)/
+
+obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
+obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
+obj-$(CONFIG_ND_BTT) += nd_btt.o
+obj-$(CONFIG_ND_BLK) += nd_blk.o
+obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
+obj-$(CONFIG_ACPI_NFIT) += nfit.o
+ifeq ($(CONFIG_DAX),m)
+obj-$(CONFIG_DAX) += dax.o
+endif
+obj-$(CONFIG_DEV_DAX) += device_dax.o
+obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
+
+nfit-y := $(ACPI_SRC)/core.o
+nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o
+nfit-y += acpi_nfit_test.o
+nfit-y += config_check.o
+
+nd_pmem-y := $(NVDIMM_SRC)/pmem.o
+nd_pmem-y += pmem-dax.o
+nd_pmem-y += pmem_test.o
+nd_pmem-y += config_check.o
+
+nd_btt-y := $(NVDIMM_SRC)/btt.o
+nd_btt-y += config_check.o
+
+nd_blk-y := $(NVDIMM_SRC)/blk.o
+nd_blk-y += config_check.o
+
+nd_e820-y := $(NVDIMM_SRC)/e820.o
+nd_e820-y += config_check.o
+
+dax-y := $(DAX_SRC)/super.o
+dax-y += config_check.o
+
+device_dax-y := $(DAX_SRC)/device.o
+device_dax-y += dax-dev.o
+device_dax-y += device_dax_test.o
+device_dax-y += config_check.o
+
+dax_pmem-y := $(DAX_SRC)/pmem.o
+dax_pmem-y += config_check.o
+
+libnvdimm-y := $(NVDIMM_SRC)/core.o
+libnvdimm-y += $(NVDIMM_SRC)/bus.o
+libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o
+libnvdimm-y += $(NVDIMM_SRC)/dimm.o
+libnvdimm-y += $(NVDIMM_SRC)/region_devs.o
+libnvdimm-y += $(NVDIMM_SRC)/region.o
+libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o
+libnvdimm-y += $(NVDIMM_SRC)/label.o
+libnvdimm-y += $(NVDIMM_SRC)/badrange.o
+libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o
+libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
+libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
+libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o
+libnvdimm-y += libnvdimm_test.o
+libnvdimm-y += config_check.o
+
+obj-m += test/
diff --git a/tools/testing/nvdimm/Makefile b/tools/testing/nvdimm/Makefile
new file mode 100644
index 000000000..c37a6a0bd
--- /dev/null
+++ b/tools/testing/nvdimm/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+KDIR ?= ../../../
+
+default:
+ $(MAKE) -C $(KDIR) M=$$PWD
+
+install: default
+ $(MAKE) -C $(KDIR) M=$$PWD modules_install
diff --git a/tools/testing/nvdimm/acpi_nfit_test.c b/tools/testing/nvdimm/acpi_nfit_test.c
new file mode 100644
index 000000000..43521512e
--- /dev/null
+++ b/tools/testing/nvdimm/acpi_nfit_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(acpi_nfit);
diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c
new file mode 100644
index 000000000..cac891028
--- /dev/null
+++ b/tools/testing/nvdimm/config_check.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bug.h>
+
+void check(void)
+{
+ /*
+ * These kconfig symbols must be set to "m" for nfit_test to
+ * load and operate.
+ */
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_PFN));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX_PMEM));
+}
diff --git a/tools/testing/nvdimm/dax-dev.c b/tools/testing/nvdimm/dax-dev.c
new file mode 100644
index 000000000..36ee3d879
--- /dev/null
+++ b/tools/testing/nvdimm/dax-dev.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include "test/nfit_test.h"
+#include <linux/mm.h>
+#include "../../../drivers/dax/dax-private.h"
+
+phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
+ unsigned long size)
+{
+ struct resource *res;
+ phys_addr_t addr;
+ int i;
+
+ for (i = 0; i < dev_dax->num_resources; i++) {
+ res = &dev_dax->res[i];
+ addr = pgoff * PAGE_SIZE + res->start;
+ if (addr >= res->start && addr <= res->end)
+ break;
+ pgoff -= PHYS_PFN(resource_size(res));
+ }
+
+ if (i < dev_dax->num_resources) {
+ res = &dev_dax->res[i];
+ if (addr + size - 1 <= res->end) {
+ if (get_nfit_res(addr)) {
+ struct page *page;
+
+ if (dev_dax->region->align > PAGE_SIZE)
+ return -1;
+
+ page = vmalloc_to_page((void *)addr);
+ return PFN_PHYS(page_to_pfn(page));
+ } else
+ return addr;
+ }
+ }
+
+ return -1;
+}
diff --git a/tools/testing/nvdimm/device_dax_test.c b/tools/testing/nvdimm/device_dax_test.c
new file mode 100644
index 000000000..24b17bf42
--- /dev/null
+++ b/tools/testing/nvdimm/device_dax_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(device_dax);
diff --git a/tools/testing/nvdimm/libnvdimm_test.c b/tools/testing/nvdimm/libnvdimm_test.c
new file mode 100644
index 000000000..00ca30b23
--- /dev/null
+++ b/tools/testing/nvdimm/libnvdimm_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(libnvdimm);
diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c
new file mode 100644
index 000000000..2e7fd8227
--- /dev/null
+++ b/tools/testing/nvdimm/pmem-dax.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2014-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include "test/nfit_test.h"
+#include <linux/blkdev.h>
+#include <pmem.h>
+#include <nd.h>
+
+long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
+ long nr_pages, void **kaddr, pfn_t *pfn)
+{
+ resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
+
+ if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512,
+ PFN_PHYS(nr_pages))))
+ return -EIO;
+
+ /*
+ * Limit dax to a single page at a time given vmalloc()-backed
+ * in the nfit_test case.
+ */
+ if (get_nfit_res(pmem->phys_addr + offset)) {
+ struct page *page;
+
+ if (kaddr)
+ *kaddr = pmem->virt_addr + offset;
+ page = vmalloc_to_page(pmem->virt_addr + offset);
+ if (pfn)
+ *pfn = page_to_pfn_t(page);
+ pr_debug_ratelimited("%s: pmem: %p pgoff: %#lx pfn: %#lx\n",
+ __func__, pmem, pgoff, page_to_pfn(page));
+
+ return 1;
+ }
+
+ if (kaddr)
+ *kaddr = pmem->virt_addr + offset;
+ if (pfn)
+ *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+
+ /*
+ * If badblocks are present, limit known good range to the
+ * requested range.
+ */
+ if (unlikely(pmem->bb.count))
+ return nr_pages;
+ return PHYS_PFN(pmem->size - pmem->pfn_pad - offset);
+}
diff --git a/tools/testing/nvdimm/pmem_test.c b/tools/testing/nvdimm/pmem_test.c
new file mode 100644
index 000000000..fd38f9227
--- /dev/null
+++ b/tools/testing/nvdimm/pmem_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(pmem);
diff --git a/tools/testing/nvdimm/test/Kbuild b/tools/testing/nvdimm/test/Kbuild
new file mode 100644
index 000000000..fb3c3d7cd
--- /dev/null
+++ b/tools/testing/nvdimm/test/Kbuild
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+ccflags-y := -I$(src)/../../../../drivers/nvdimm/
+ccflags-y += -I$(src)/../../../../drivers/acpi/nfit/
+
+obj-m += nfit_test.o
+obj-m += nfit_test_iomap.o
+
+nfit_test-y := nfit.o
+nfit_test_iomap-y := iomap.o
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
new file mode 100644
index 000000000..c6635fee2
--- /dev/null
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -0,0 +1,403 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/memremap.h>
+#include <linux/rculist.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/pfn_t.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include "nfit_test.h"
+
+static LIST_HEAD(iomap_head);
+
+static struct iomap_ops {
+ nfit_test_lookup_fn nfit_test_lookup;
+ nfit_test_evaluate_dsm_fn evaluate_dsm;
+ struct list_head list;
+} iomap_ops = {
+ .list = LIST_HEAD_INIT(iomap_ops.list),
+};
+
+void nfit_test_setup(nfit_test_lookup_fn lookup,
+ nfit_test_evaluate_dsm_fn evaluate)
+{
+ iomap_ops.nfit_test_lookup = lookup;
+ iomap_ops.evaluate_dsm = evaluate;
+ list_add_rcu(&iomap_ops.list, &iomap_head);
+}
+EXPORT_SYMBOL(nfit_test_setup);
+
+void nfit_test_teardown(void)
+{
+ list_del_rcu(&iomap_ops.list);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL(nfit_test_teardown);
+
+static struct nfit_test_resource *__get_nfit_res(resource_size_t resource)
+{
+ struct iomap_ops *ops;
+
+ ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list);
+ if (ops)
+ return ops->nfit_test_lookup(resource);
+ return NULL;
+}
+
+struct nfit_test_resource *get_nfit_res(resource_size_t resource)
+{
+ struct nfit_test_resource *res;
+
+ rcu_read_lock();
+ res = __get_nfit_res(resource);
+ rcu_read_unlock();
+
+ return res;
+}
+EXPORT_SYMBOL(get_nfit_res);
+
+void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
+ void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res(offset);
+
+ if (nfit_res)
+ return (void __iomem *) nfit_res->buf + offset
+ - nfit_res->res.start;
+ return fallback_fn(offset, size);
+}
+
+void __iomem *__wrap_devm_ioremap_nocache(struct device *dev,
+ resource_size_t offset, unsigned long size)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res(offset);
+
+ if (nfit_res)
+ return (void __iomem *) nfit_res->buf + offset
+ - nfit_res->res.start;
+ return devm_ioremap_nocache(dev, offset, size);
+}
+EXPORT_SYMBOL(__wrap_devm_ioremap_nocache);
+
+void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
+ size_t size, unsigned long flags)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res(offset);
+
+ if (nfit_res)
+ return nfit_res->buf + offset - nfit_res->res.start;
+ return devm_memremap(dev, offset, size, flags);
+}
+EXPORT_SYMBOL(__wrap_devm_memremap);
+
+static void nfit_test_kill(void *_pgmap)
+{
+ struct dev_pagemap *pgmap = _pgmap;
+
+ pgmap->kill(pgmap->ref);
+}
+
+void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
+{
+ resource_size_t offset = pgmap->res.start;
+ struct nfit_test_resource *nfit_res = get_nfit_res(offset);
+
+ if (nfit_res) {
+ int rc;
+
+ rc = devm_add_action_or_reset(dev, nfit_test_kill, pgmap);
+ if (rc)
+ return ERR_PTR(rc);
+ return nfit_res->buf + offset - nfit_res->res.start;
+ }
+ return devm_memremap_pages(dev, pgmap);
+}
+EXPORT_SYMBOL_GPL(__wrap_devm_memremap_pages);
+
+pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res(addr);
+
+ if (nfit_res)
+ flags &= ~PFN_MAP;
+ return phys_to_pfn_t(addr, flags);
+}
+EXPORT_SYMBOL(__wrap_phys_to_pfn_t);
+
+void *__wrap_memremap(resource_size_t offset, size_t size,
+ unsigned long flags)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res(offset);
+
+ if (nfit_res)
+ return nfit_res->buf + offset - nfit_res->res.start;
+ return memremap(offset, size, flags);
+}
+EXPORT_SYMBOL(__wrap_memremap);
+
+void __wrap_devm_memunmap(struct device *dev, void *addr)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res((long) addr);
+
+ if (nfit_res)
+ return;
+ return devm_memunmap(dev, addr);
+}
+EXPORT_SYMBOL(__wrap_devm_memunmap);
+
+void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
+{
+ return __nfit_test_ioremap(offset, size, ioremap_nocache);
+}
+EXPORT_SYMBOL(__wrap_ioremap_nocache);
+
+void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size)
+{
+ return __nfit_test_ioremap(offset, size, ioremap_wc);
+}
+EXPORT_SYMBOL(__wrap_ioremap_wc);
+
+void __wrap_iounmap(volatile void __iomem *addr)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res((long) addr);
+ if (nfit_res)
+ return;
+ return iounmap(addr);
+}
+EXPORT_SYMBOL(__wrap_iounmap);
+
+void __wrap_memunmap(void *addr)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res((long) addr);
+
+ if (nfit_res)
+ return;
+ return memunmap(addr);
+}
+EXPORT_SYMBOL(__wrap_memunmap);
+
+static bool nfit_test_release_region(struct device *dev,
+ struct resource *parent, resource_size_t start,
+ resource_size_t n);
+
+static void nfit_devres_release(struct device *dev, void *data)
+{
+ struct resource *res = *((struct resource **) data);
+
+ WARN_ON(!nfit_test_release_region(NULL, &iomem_resource, res->start,
+ resource_size(res)));
+}
+
+static int match(struct device *dev, void *__res, void *match_data)
+{
+ struct resource *res = *((struct resource **) __res);
+ resource_size_t start = *((resource_size_t *) match_data);
+
+ return res->start == start;
+}
+
+static bool nfit_test_release_region(struct device *dev,
+ struct resource *parent, resource_size_t start,
+ resource_size_t n)
+{
+ if (parent == &iomem_resource) {
+ struct nfit_test_resource *nfit_res = get_nfit_res(start);
+
+ if (nfit_res) {
+ struct nfit_test_request *req;
+ struct resource *res = NULL;
+
+ if (dev) {
+ devres_release(dev, nfit_devres_release, match,
+ &start);
+ return true;
+ }
+
+ spin_lock(&nfit_res->lock);
+ list_for_each_entry(req, &nfit_res->requests, list)
+ if (req->res.start == start) {
+ res = &req->res;
+ list_del(&req->list);
+ break;
+ }
+ spin_unlock(&nfit_res->lock);
+
+ WARN(!res || resource_size(res) != n,
+ "%s: start: %llx n: %llx mismatch: %pr\n",
+ __func__, start, n, res);
+ if (res)
+ kfree(req);
+ return true;
+ }
+ }
+ return false;
+}
+
+static struct resource *nfit_test_request_region(struct device *dev,
+ struct resource *parent, resource_size_t start,
+ resource_size_t n, const char *name, int flags)
+{
+ struct nfit_test_resource *nfit_res;
+
+ if (parent == &iomem_resource) {
+ nfit_res = get_nfit_res(start);
+ if (nfit_res) {
+ struct nfit_test_request *req;
+ struct resource *res = NULL;
+
+ if (start + n > nfit_res->res.start
+ + resource_size(&nfit_res->res)) {
+ pr_debug("%s: start: %llx n: %llx overflow: %pr\n",
+ __func__, start, n,
+ &nfit_res->res);
+ return NULL;
+ }
+
+ spin_lock(&nfit_res->lock);
+ list_for_each_entry(req, &nfit_res->requests, list)
+ if (start == req->res.start) {
+ res = &req->res;
+ break;
+ }
+ spin_unlock(&nfit_res->lock);
+
+ if (res) {
+ WARN(1, "%pr already busy\n", res);
+ return NULL;
+ }
+
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (!req)
+ return NULL;
+ INIT_LIST_HEAD(&req->list);
+ res = &req->res;
+
+ res->start = start;
+ res->end = start + n - 1;
+ res->name = name;
+ res->flags = resource_type(parent);
+ res->flags |= IORESOURCE_BUSY | flags;
+ spin_lock(&nfit_res->lock);
+ list_add(&req->list, &nfit_res->requests);
+ spin_unlock(&nfit_res->lock);
+
+ if (dev) {
+ struct resource **d;
+
+ d = devres_alloc(nfit_devres_release,
+ sizeof(struct resource *),
+ GFP_KERNEL);
+ if (!d)
+ return NULL;
+ *d = res;
+ devres_add(dev, d);
+ }
+
+ pr_debug("%s: %pr\n", __func__, res);
+ return res;
+ }
+ }
+ if (dev)
+ return __devm_request_region(dev, parent, start, n, name);
+ return __request_region(parent, start, n, name, flags);
+}
+
+struct resource *__wrap___request_region(struct resource *parent,
+ resource_size_t start, resource_size_t n, const char *name,
+ int flags)
+{
+ return nfit_test_request_region(NULL, parent, start, n, name, flags);
+}
+EXPORT_SYMBOL(__wrap___request_region);
+
+int __wrap_insert_resource(struct resource *parent, struct resource *res)
+{
+ if (get_nfit_res(res->start))
+ return 0;
+ return insert_resource(parent, res);
+}
+EXPORT_SYMBOL(__wrap_insert_resource);
+
+int __wrap_remove_resource(struct resource *res)
+{
+ if (get_nfit_res(res->start))
+ return 0;
+ return remove_resource(res);
+}
+EXPORT_SYMBOL(__wrap_remove_resource);
+
+struct resource *__wrap___devm_request_region(struct device *dev,
+ struct resource *parent, resource_size_t start,
+ resource_size_t n, const char *name)
+{
+ if (!dev)
+ return NULL;
+ return nfit_test_request_region(dev, parent, start, n, name, 0);
+}
+EXPORT_SYMBOL(__wrap___devm_request_region);
+
+void __wrap___release_region(struct resource *parent, resource_size_t start,
+ resource_size_t n)
+{
+ if (!nfit_test_release_region(NULL, parent, start, n))
+ __release_region(parent, start, n);
+}
+EXPORT_SYMBOL(__wrap___release_region);
+
+void __wrap___devm_release_region(struct device *dev, struct resource *parent,
+ resource_size_t start, resource_size_t n)
+{
+ if (!nfit_test_release_region(dev, parent, start, n))
+ __devm_release_region(dev, parent, start, n);
+}
+EXPORT_SYMBOL(__wrap___devm_release_region);
+
+acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path,
+ struct acpi_object_list *p, struct acpi_buffer *buf)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res((long) handle);
+ union acpi_object **obj;
+
+ if (!nfit_res || strcmp(path, "_FIT") || !buf)
+ return acpi_evaluate_object(handle, path, p, buf);
+
+ obj = nfit_res->buf;
+ buf->length = sizeof(union acpi_object);
+ buf->pointer = *obj;
+ return AE_OK;
+}
+EXPORT_SYMBOL(__wrap_acpi_evaluate_object);
+
+union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,
+ u64 rev, u64 func, union acpi_object *argv4)
+{
+ union acpi_object *obj = ERR_PTR(-ENXIO);
+ struct iomap_ops *ops;
+
+ rcu_read_lock();
+ ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list);
+ if (ops)
+ obj = ops->evaluate_dsm(handle, guid, rev, func, argv4);
+ rcu_read_unlock();
+
+ if (IS_ERR(obj))
+ return acpi_evaluate_dsm(handle, guid, rev, func, argv4);
+ return obj;
+}
+EXPORT_SYMBOL(__wrap_acpi_evaluate_dsm);
+
+MODULE_LICENSE("GPL v2");
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
new file mode 100644
index 000000000..fa763dbfd
--- /dev/null
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -0,0 +1,2957 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/libnvdimm.h>
+#include <linux/genalloc.h>
+#include <linux/vmalloc.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/ndctl.h>
+#include <linux/sizes.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <nd-core.h>
+#include <nfit.h>
+#include <nd.h>
+#include "nfit_test.h"
+#include "../watermark.h"
+
+#include <asm/mcsafe_test.h>
+
+/*
+ * Generate an NFIT table to describe the following topology:
+ *
+ * BUS0: Interleaved PMEM regions, and aliasing with BLK regions
+ *
+ * (a) (b) DIMM BLK-REGION
+ * +----------+--------------+----------+---------+
+ * +------+ | blk2.0 | pm0.0 | blk2.1 | pm1.0 | 0 region2
+ * | imc0 +--+- - - - - region0 - - - -+----------+ +
+ * +--+---+ | blk3.0 | pm0.0 | blk3.1 | pm1.0 | 1 region3
+ * | +----------+--------------v----------v v
+ * +--+---+ | |
+ * | cpu0 | region1
+ * +--+---+ | |
+ * | +-------------------------^----------^ ^
+ * +--+---+ | blk4.0 | pm1.0 | 2 region4
+ * | imc1 +--+-------------------------+----------+ +
+ * +------+ | blk5.0 | pm1.0 | 3 region5
+ * +-------------------------+----------+-+-------+
+ *
+ * +--+---+
+ * | cpu1 |
+ * +--+---+ (Hotplug DIMM)
+ * | +----------------------------------------------+
+ * +--+---+ | blk6.0/pm7.0 | 4 region6/7
+ * | imc0 +--+----------------------------------------------+
+ * +------+
+ *
+ *
+ * *) In this layout we have four dimms and two memory controllers in one
+ * socket. Each unique interface (BLK or PMEM) to DPA space
+ * is identified by a region device with a dynamically assigned id.
+ *
+ * *) The first portion of dimm0 and dimm1 are interleaved as REGION0.
+ * A single PMEM namespace "pm0.0" is created using half of the
+ * REGION0 SPA-range. REGION0 spans dimm0 and dimm1. PMEM namespace
+ * allocate from from the bottom of a region. The unallocated
+ * portion of REGION0 aliases with REGION2 and REGION3. That
+ * unallacted capacity is reclaimed as BLK namespaces ("blk2.0" and
+ * "blk3.0") starting at the base of each DIMM to offset (a) in those
+ * DIMMs. "pm0.0", "blk2.0" and "blk3.0" are free-form readable
+ * names that can be assigned to a namespace.
+ *
+ * *) In the last portion of dimm0 and dimm1 we have an interleaved
+ * SPA range, REGION1, that spans those two dimms as well as dimm2
+ * and dimm3. Some of REGION1 allocated to a PMEM namespace named
+ * "pm1.0" the rest is reclaimed in 4 BLK namespaces (for each
+ * dimm in the interleave set), "blk2.1", "blk3.1", "blk4.0", and
+ * "blk5.0".
+ *
+ * *) The portion of dimm2 and dimm3 that do not participate in the
+ * REGION1 interleaved SPA range (i.e. the DPA address below offset
+ * (b) are also included in the "blk4.0" and "blk5.0" namespaces.
+ * Note, that BLK namespaces need not be contiguous in DPA-space, and
+ * can consume aliased capacity from multiple interleave sets.
+ *
+ * BUS1: Legacy NVDIMM (single contiguous range)
+ *
+ * region2
+ * +---------------------+
+ * |---------------------|
+ * || pm2.0 ||
+ * |---------------------|
+ * +---------------------+
+ *
+ * *) A NFIT-table may describe a simple system-physical-address range
+ * with no BLK aliasing. This type of region may optionally
+ * reference an NVDIMM.
+ */
+enum {
+ NUM_PM = 3,
+ NUM_DCR = 5,
+ NUM_HINTS = 8,
+ NUM_BDW = NUM_DCR,
+ NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
+ NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */
+ + 4 /* spa1 iset */ + 1 /* spa11 iset */,
+ DIMM_SIZE = SZ_32M,
+ LABEL_SIZE = SZ_128K,
+ SPA_VCD_SIZE = SZ_4M,
+ SPA0_SIZE = DIMM_SIZE,
+ SPA1_SIZE = DIMM_SIZE*2,
+ SPA2_SIZE = DIMM_SIZE,
+ BDW_SIZE = 64 << 8,
+ DCR_SIZE = 12,
+ NUM_NFITS = 2, /* permit testing multiple NFITs per system */
+};
+
+struct nfit_test_dcr {
+ __le64 bdw_addr;
+ __le32 bdw_status;
+ __u8 aperature[BDW_SIZE];
+};
+
+#define NFIT_DIMM_HANDLE(node, socket, imc, chan, dimm) \
+ (((node & 0xfff) << 16) | ((socket & 0xf) << 12) \
+ | ((imc & 0xf) << 8) | ((chan & 0xf) << 4) | (dimm & 0xf))
+
+static u32 handle[] = {
+ [0] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 0),
+ [1] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 1),
+ [2] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 0),
+ [3] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1),
+ [4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0),
+ [5] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 0),
+ [6] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 1),
+};
+
+static unsigned long dimm_fail_cmd_flags[ARRAY_SIZE(handle)];
+static int dimm_fail_cmd_code[ARRAY_SIZE(handle)];
+
+static const struct nd_intel_smart smart_def = {
+ .flags = ND_INTEL_SMART_HEALTH_VALID
+ | ND_INTEL_SMART_SPARES_VALID
+ | ND_INTEL_SMART_ALARM_VALID
+ | ND_INTEL_SMART_USED_VALID
+ | ND_INTEL_SMART_SHUTDOWN_VALID
+ | ND_INTEL_SMART_MTEMP_VALID
+ | ND_INTEL_SMART_CTEMP_VALID,
+ .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
+ .media_temperature = 23 * 16,
+ .ctrl_temperature = 25 * 16,
+ .pmic_temperature = 40 * 16,
+ .spares = 75,
+ .alarm_flags = ND_INTEL_SMART_SPARE_TRIP
+ | ND_INTEL_SMART_TEMP_TRIP,
+ .ait_status = 1,
+ .life_used = 5,
+ .shutdown_state = 0,
+ .vendor_size = 0,
+ .shutdown_count = 100,
+};
+
+struct nfit_test_fw {
+ enum intel_fw_update_state state;
+ u32 context;
+ u64 version;
+ u32 size_received;
+ u64 end_time;
+};
+
+struct nfit_test {
+ struct acpi_nfit_desc acpi_desc;
+ struct platform_device pdev;
+ struct list_head resources;
+ void *nfit_buf;
+ dma_addr_t nfit_dma;
+ size_t nfit_size;
+ size_t nfit_filled;
+ int dcr_idx;
+ int num_dcr;
+ int num_pm;
+ void **dimm;
+ dma_addr_t *dimm_dma;
+ void **flush;
+ dma_addr_t *flush_dma;
+ void **label;
+ dma_addr_t *label_dma;
+ void **spa_set;
+ dma_addr_t *spa_set_dma;
+ struct nfit_test_dcr **dcr;
+ dma_addr_t *dcr_dma;
+ int (*alloc)(struct nfit_test *t);
+ void (*setup)(struct nfit_test *t);
+ int setup_hotplug;
+ union acpi_object **_fit;
+ dma_addr_t _fit_dma;
+ struct ars_state {
+ struct nd_cmd_ars_status *ars_status;
+ unsigned long deadline;
+ spinlock_t lock;
+ } ars_state;
+ struct device *dimm_dev[ARRAY_SIZE(handle)];
+ struct nd_intel_smart *smart;
+ struct nd_intel_smart_threshold *smart_threshold;
+ struct badrange badrange;
+ struct work_struct work;
+ struct nfit_test_fw *fw;
+};
+
+static struct workqueue_struct *nfit_wq;
+
+static struct gen_pool *nfit_pool;
+
+static struct nfit_test *to_nfit_test(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+
+ return container_of(pdev, struct nfit_test, pdev);
+}
+
+static int nd_intel_test_get_fw_info(struct nfit_test *t,
+ struct nd_intel_fw_info *nd_cmd, unsigned int buf_len,
+ int idx)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_fw *fw = &t->fw[idx];
+
+ dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p, buf_len: %u, idx: %d\n",
+ __func__, t, nd_cmd, buf_len, idx);
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+ nd_cmd->status = 0;
+ nd_cmd->storage_size = INTEL_FW_STORAGE_SIZE;
+ nd_cmd->max_send_len = INTEL_FW_MAX_SEND_LEN;
+ nd_cmd->query_interval = INTEL_FW_QUERY_INTERVAL;
+ nd_cmd->max_query_time = INTEL_FW_QUERY_MAX_TIME;
+ nd_cmd->update_cap = 0;
+ nd_cmd->fis_version = INTEL_FW_FIS_VERSION;
+ nd_cmd->run_version = 0;
+ nd_cmd->updated_version = fw->version;
+
+ return 0;
+}
+
+static int nd_intel_test_start_update(struct nfit_test *t,
+ struct nd_intel_fw_start *nd_cmd, unsigned int buf_len,
+ int idx)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_fw *fw = &t->fw[idx];
+
+ dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+ __func__, t, nd_cmd, buf_len, idx);
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+ if (fw->state != FW_STATE_NEW) {
+ /* extended status, FW update in progress */
+ nd_cmd->status = 0x10007;
+ return 0;
+ }
+
+ fw->state = FW_STATE_IN_PROGRESS;
+ fw->context++;
+ fw->size_received = 0;
+ nd_cmd->status = 0;
+ nd_cmd->context = fw->context;
+
+ dev_dbg(dev, "%s: context issued: %#x\n", __func__, nd_cmd->context);
+
+ return 0;
+}
+
+static int nd_intel_test_send_data(struct nfit_test *t,
+ struct nd_intel_fw_send_data *nd_cmd, unsigned int buf_len,
+ int idx)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_fw *fw = &t->fw[idx];
+ u32 *status = (u32 *)&nd_cmd->data[nd_cmd->length];
+
+ dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+ __func__, t, nd_cmd, buf_len, idx);
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+
+ dev_dbg(dev, "%s: cmd->status: %#x\n", __func__, *status);
+ dev_dbg(dev, "%s: cmd->data[0]: %#x\n", __func__, nd_cmd->data[0]);
+ dev_dbg(dev, "%s: cmd->data[%u]: %#x\n", __func__, nd_cmd->length-1,
+ nd_cmd->data[nd_cmd->length-1]);
+
+ if (fw->state != FW_STATE_IN_PROGRESS) {
+ dev_dbg(dev, "%s: not in IN_PROGRESS state\n", __func__);
+ *status = 0x5;
+ return 0;
+ }
+
+ if (nd_cmd->context != fw->context) {
+ dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+ __func__, nd_cmd->context, fw->context);
+ *status = 0x10007;
+ return 0;
+ }
+
+ /*
+ * check offset + len > size of fw storage
+ * check length is > max send length
+ */
+ if (nd_cmd->offset + nd_cmd->length > INTEL_FW_STORAGE_SIZE ||
+ nd_cmd->length > INTEL_FW_MAX_SEND_LEN) {
+ *status = 0x3;
+ dev_dbg(dev, "%s: buffer boundary violation\n", __func__);
+ return 0;
+ }
+
+ fw->size_received += nd_cmd->length;
+ dev_dbg(dev, "%s: copying %u bytes, %u bytes so far\n",
+ __func__, nd_cmd->length, fw->size_received);
+ *status = 0;
+ return 0;
+}
+
+static int nd_intel_test_finish_fw(struct nfit_test *t,
+ struct nd_intel_fw_finish_update *nd_cmd,
+ unsigned int buf_len, int idx)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_fw *fw = &t->fw[idx];
+
+ dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+ __func__, t, nd_cmd, buf_len, idx);
+
+ if (fw->state == FW_STATE_UPDATED) {
+ /* update already done, need cold boot */
+ nd_cmd->status = 0x20007;
+ return 0;
+ }
+
+ dev_dbg(dev, "%s: context: %#x ctrl_flags: %#x\n",
+ __func__, nd_cmd->context, nd_cmd->ctrl_flags);
+
+ switch (nd_cmd->ctrl_flags) {
+ case 0: /* finish */
+ if (nd_cmd->context != fw->context) {
+ dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+ __func__, nd_cmd->context,
+ fw->context);
+ nd_cmd->status = 0x10007;
+ return 0;
+ }
+ nd_cmd->status = 0;
+ fw->state = FW_STATE_VERIFY;
+ /* set 1 second of time for firmware "update" */
+ fw->end_time = jiffies + HZ;
+ break;
+
+ case 1: /* abort */
+ fw->size_received = 0;
+ /* successfully aborted status */
+ nd_cmd->status = 0x40007;
+ fw->state = FW_STATE_NEW;
+ dev_dbg(dev, "%s: abort successful\n", __func__);
+ break;
+
+ default: /* bad control flag */
+ dev_warn(dev, "%s: unknown control flag: %#x\n",
+ __func__, nd_cmd->ctrl_flags);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nd_intel_test_finish_query(struct nfit_test *t,
+ struct nd_intel_fw_finish_query *nd_cmd,
+ unsigned int buf_len, int idx)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_fw *fw = &t->fw[idx];
+
+ dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+ __func__, t, nd_cmd, buf_len, idx);
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+ if (nd_cmd->context != fw->context) {
+ dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+ __func__, nd_cmd->context, fw->context);
+ nd_cmd->status = 0x10007;
+ return 0;
+ }
+
+ dev_dbg(dev, "%s context: %#x\n", __func__, nd_cmd->context);
+
+ switch (fw->state) {
+ case FW_STATE_NEW:
+ nd_cmd->updated_fw_rev = 0;
+ nd_cmd->status = 0;
+ dev_dbg(dev, "%s: new state\n", __func__);
+ break;
+
+ case FW_STATE_IN_PROGRESS:
+ /* sequencing error */
+ nd_cmd->status = 0x40007;
+ nd_cmd->updated_fw_rev = 0;
+ dev_dbg(dev, "%s: sequence error\n", __func__);
+ break;
+
+ case FW_STATE_VERIFY:
+ if (time_is_after_jiffies64(fw->end_time)) {
+ nd_cmd->updated_fw_rev = 0;
+ nd_cmd->status = 0x20007;
+ dev_dbg(dev, "%s: still verifying\n", __func__);
+ break;
+ }
+
+ dev_dbg(dev, "%s: transition out verify\n", __func__);
+ fw->state = FW_STATE_UPDATED;
+ /* we are going to fall through if it's "done" */
+ case FW_STATE_UPDATED:
+ nd_cmd->status = 0;
+ /* bogus test version */
+ fw->version = nd_cmd->updated_fw_rev =
+ INTEL_FW_FAKE_VERSION;
+ dev_dbg(dev, "%s: updated\n", __func__);
+ break;
+
+ default: /* we should never get here */
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd,
+ unsigned int buf_len)
+{
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+ nd_cmd->status = 0;
+ nd_cmd->config_size = LABEL_SIZE;
+ nd_cmd->max_xfer = SZ_4K;
+
+ return 0;
+}
+
+static int nfit_test_cmd_get_config_data(struct nd_cmd_get_config_data_hdr
+ *nd_cmd, unsigned int buf_len, void *label)
+{
+ unsigned int len, offset = nd_cmd->in_offset;
+ int rc;
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+ if (offset >= LABEL_SIZE)
+ return -EINVAL;
+ if (nd_cmd->in_length + sizeof(*nd_cmd) > buf_len)
+ return -EINVAL;
+
+ nd_cmd->status = 0;
+ len = min(nd_cmd->in_length, LABEL_SIZE - offset);
+ memcpy(nd_cmd->out_buf, label + offset, len);
+ rc = buf_len - sizeof(*nd_cmd) - len;
+
+ return rc;
+}
+
+static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd,
+ unsigned int buf_len, void *label)
+{
+ unsigned int len, offset = nd_cmd->in_offset;
+ u32 *status;
+ int rc;
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+ if (offset >= LABEL_SIZE)
+ return -EINVAL;
+ if (nd_cmd->in_length + sizeof(*nd_cmd) + 4 > buf_len)
+ return -EINVAL;
+
+ status = (void *)nd_cmd + nd_cmd->in_length + sizeof(*nd_cmd);
+ *status = 0;
+ len = min(nd_cmd->in_length, LABEL_SIZE - offset);
+ memcpy(label + offset, nd_cmd->in_buf, len);
+ rc = buf_len - sizeof(*nd_cmd) - (len + 4);
+
+ return rc;
+}
+
+#define NFIT_TEST_CLEAR_ERR_UNIT 256
+
+static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd,
+ unsigned int buf_len)
+{
+ int ars_recs;
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+ /* for testing, only store up to n records that fit within 4k */
+ ars_recs = SZ_4K / sizeof(struct nd_ars_record);
+
+ nd_cmd->max_ars_out = sizeof(struct nd_cmd_ars_status)
+ + ars_recs * sizeof(struct nd_ars_record);
+ nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16;
+ nd_cmd->clear_err_unit = NFIT_TEST_CLEAR_ERR_UNIT;
+
+ return 0;
+}
+
+static void post_ars_status(struct ars_state *ars_state,
+ struct badrange *badrange, u64 addr, u64 len)
+{
+ struct nd_cmd_ars_status *ars_status;
+ struct nd_ars_record *ars_record;
+ struct badrange_entry *be;
+ u64 end = addr + len - 1;
+ int i = 0;
+
+ ars_state->deadline = jiffies + 1*HZ;
+ ars_status = ars_state->ars_status;
+ ars_status->status = 0;
+ ars_status->address = addr;
+ ars_status->length = len;
+ ars_status->type = ND_ARS_PERSISTENT;
+
+ spin_lock(&badrange->lock);
+ list_for_each_entry(be, &badrange->list, list) {
+ u64 be_end = be->start + be->length - 1;
+ u64 rstart, rend;
+
+ /* skip entries outside the range */
+ if (be_end < addr || be->start > end)
+ continue;
+
+ rstart = (be->start < addr) ? addr : be->start;
+ rend = (be_end < end) ? be_end : end;
+ ars_record = &ars_status->records[i];
+ ars_record->handle = 0;
+ ars_record->err_address = rstart;
+ ars_record->length = rend - rstart + 1;
+ i++;
+ }
+ spin_unlock(&badrange->lock);
+ ars_status->num_records = i;
+ ars_status->out_length = sizeof(struct nd_cmd_ars_status)
+ + i * sizeof(struct nd_ars_record);
+}
+
+static int nfit_test_cmd_ars_start(struct nfit_test *t,
+ struct ars_state *ars_state,
+ struct nd_cmd_ars_start *ars_start, unsigned int buf_len,
+ int *cmd_rc)
+{
+ if (buf_len < sizeof(*ars_start))
+ return -EINVAL;
+
+ spin_lock(&ars_state->lock);
+ if (time_before(jiffies, ars_state->deadline)) {
+ ars_start->status = NFIT_ARS_START_BUSY;
+ *cmd_rc = -EBUSY;
+ } else {
+ ars_start->status = 0;
+ ars_start->scrub_time = 1;
+ post_ars_status(ars_state, &t->badrange, ars_start->address,
+ ars_start->length);
+ *cmd_rc = 0;
+ }
+ spin_unlock(&ars_state->lock);
+
+ return 0;
+}
+
+static int nfit_test_cmd_ars_status(struct ars_state *ars_state,
+ struct nd_cmd_ars_status *ars_status, unsigned int buf_len,
+ int *cmd_rc)
+{
+ if (buf_len < ars_state->ars_status->out_length)
+ return -EINVAL;
+
+ spin_lock(&ars_state->lock);
+ if (time_before(jiffies, ars_state->deadline)) {
+ memset(ars_status, 0, buf_len);
+ ars_status->status = NFIT_ARS_STATUS_BUSY;
+ ars_status->out_length = sizeof(*ars_status);
+ *cmd_rc = -EBUSY;
+ } else {
+ memcpy(ars_status, ars_state->ars_status,
+ ars_state->ars_status->out_length);
+ *cmd_rc = 0;
+ }
+ spin_unlock(&ars_state->lock);
+ return 0;
+}
+
+static int nfit_test_cmd_clear_error(struct nfit_test *t,
+ struct nd_cmd_clear_error *clear_err,
+ unsigned int buf_len, int *cmd_rc)
+{
+ const u64 mask = NFIT_TEST_CLEAR_ERR_UNIT - 1;
+ if (buf_len < sizeof(*clear_err))
+ return -EINVAL;
+
+ if ((clear_err->address & mask) || (clear_err->length & mask))
+ return -EINVAL;
+
+ badrange_forget(&t->badrange, clear_err->address, clear_err->length);
+ clear_err->status = 0;
+ clear_err->cleared = clear_err->length;
+ *cmd_rc = 0;
+ return 0;
+}
+
+struct region_search_spa {
+ u64 addr;
+ struct nd_region *region;
+};
+
+static int is_region_device(struct device *dev)
+{
+ return !strncmp(dev->kobj.name, "region", 6);
+}
+
+static int nfit_test_search_region_spa(struct device *dev, void *data)
+{
+ struct region_search_spa *ctx = data;
+ struct nd_region *nd_region;
+ resource_size_t ndr_end;
+
+ if (!is_region_device(dev))
+ return 0;
+
+ nd_region = to_nd_region(dev);
+ ndr_end = nd_region->ndr_start + nd_region->ndr_size;
+
+ if (ctx->addr >= nd_region->ndr_start && ctx->addr < ndr_end) {
+ ctx->region = nd_region;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int nfit_test_search_spa(struct nvdimm_bus *bus,
+ struct nd_cmd_translate_spa *spa)
+{
+ int ret;
+ struct nd_region *nd_region = NULL;
+ struct nvdimm *nvdimm = NULL;
+ struct nd_mapping *nd_mapping = NULL;
+ struct region_search_spa ctx = {
+ .addr = spa->spa,
+ .region = NULL,
+ };
+ u64 dpa;
+
+ ret = device_for_each_child(&bus->dev, &ctx,
+ nfit_test_search_region_spa);
+
+ if (!ret)
+ return -ENODEV;
+
+ nd_region = ctx.region;
+
+ dpa = ctx.addr - nd_region->ndr_start;
+
+ /*
+ * last dimm is selected for test
+ */
+ nd_mapping = &nd_region->mapping[nd_region->ndr_mappings - 1];
+ nvdimm = nd_mapping->nvdimm;
+
+ spa->devices[0].nfit_device_handle = handle[nvdimm->id];
+ spa->num_nvdimms = 1;
+ spa->devices[0].dpa = dpa;
+
+ return 0;
+}
+
+static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus,
+ struct nd_cmd_translate_spa *spa, unsigned int buf_len)
+{
+ if (buf_len < spa->translate_length)
+ return -EINVAL;
+
+ if (nfit_test_search_spa(bus, spa) < 0 || !spa->num_nvdimms)
+ spa->status = 2;
+
+ return 0;
+}
+
+static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len,
+ struct nd_intel_smart *smart_data)
+{
+ if (buf_len < sizeof(*smart))
+ return -EINVAL;
+ memcpy(smart, smart_data, sizeof(*smart));
+ return 0;
+}
+
+static int nfit_test_cmd_smart_threshold(
+ struct nd_intel_smart_threshold *out,
+ unsigned int buf_len,
+ struct nd_intel_smart_threshold *smart_t)
+{
+ if (buf_len < sizeof(*smart_t))
+ return -EINVAL;
+ memcpy(out, smart_t, sizeof(*smart_t));
+ return 0;
+}
+
+static void smart_notify(struct device *bus_dev,
+ struct device *dimm_dev, struct nd_intel_smart *smart,
+ struct nd_intel_smart_threshold *thresh)
+{
+ dev_dbg(dimm_dev, "%s: alarm: %#x spares: %d (%d) mtemp: %d (%d) ctemp: %d (%d)\n",
+ __func__, thresh->alarm_control, thresh->spares,
+ smart->spares, thresh->media_temperature,
+ smart->media_temperature, thresh->ctrl_temperature,
+ smart->ctrl_temperature);
+ if (((thresh->alarm_control & ND_INTEL_SMART_SPARE_TRIP)
+ && smart->spares
+ <= thresh->spares)
+ || ((thresh->alarm_control & ND_INTEL_SMART_TEMP_TRIP)
+ && smart->media_temperature
+ >= thresh->media_temperature)
+ || ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP)
+ && smart->ctrl_temperature
+ >= thresh->ctrl_temperature)
+ || (smart->health != ND_INTEL_SMART_NON_CRITICAL_HEALTH)
+ || (smart->shutdown_state != 0)) {
+ device_lock(bus_dev);
+ __acpi_nvdimm_notify(dimm_dev, 0x81);
+ device_unlock(bus_dev);
+ }
+}
+
+static int nfit_test_cmd_smart_set_threshold(
+ struct nd_intel_smart_set_threshold *in,
+ unsigned int buf_len,
+ struct nd_intel_smart_threshold *thresh,
+ struct nd_intel_smart *smart,
+ struct device *bus_dev, struct device *dimm_dev)
+{
+ unsigned int size;
+
+ size = sizeof(*in) - 4;
+ if (buf_len < size)
+ return -EINVAL;
+ memcpy(thresh->data, in, size);
+ in->status = 0;
+ smart_notify(bus_dev, dimm_dev, smart, thresh);
+
+ return 0;
+}
+
+static int nfit_test_cmd_smart_inject(
+ struct nd_intel_smart_inject *inj,
+ unsigned int buf_len,
+ struct nd_intel_smart_threshold *thresh,
+ struct nd_intel_smart *smart,
+ struct device *bus_dev, struct device *dimm_dev)
+{
+ if (buf_len != sizeof(*inj))
+ return -EINVAL;
+
+ if (inj->flags & ND_INTEL_SMART_INJECT_MTEMP) {
+ if (inj->mtemp_enable)
+ smart->media_temperature = inj->media_temperature;
+ else
+ smart->media_temperature = smart_def.media_temperature;
+ }
+ if (inj->flags & ND_INTEL_SMART_INJECT_SPARE) {
+ if (inj->spare_enable)
+ smart->spares = inj->spares;
+ else
+ smart->spares = smart_def.spares;
+ }
+ if (inj->flags & ND_INTEL_SMART_INJECT_FATAL) {
+ if (inj->fatal_enable)
+ smart->health = ND_INTEL_SMART_FATAL_HEALTH;
+ else
+ smart->health = ND_INTEL_SMART_NON_CRITICAL_HEALTH;
+ }
+ if (inj->flags & ND_INTEL_SMART_INJECT_SHUTDOWN) {
+ if (inj->unsafe_shutdown_enable) {
+ smart->shutdown_state = 1;
+ smart->shutdown_count++;
+ } else
+ smart->shutdown_state = 0;
+ }
+ inj->status = 0;
+ smart_notify(bus_dev, dimm_dev, smart, thresh);
+
+ return 0;
+}
+
+static void uc_error_notify(struct work_struct *work)
+{
+ struct nfit_test *t = container_of(work, typeof(*t), work);
+
+ __acpi_nfit_notify(&t->pdev.dev, t, NFIT_NOTIFY_UC_MEMORY_ERROR);
+}
+
+static int nfit_test_cmd_ars_error_inject(struct nfit_test *t,
+ struct nd_cmd_ars_err_inj *err_inj, unsigned int buf_len)
+{
+ int rc;
+
+ if (buf_len != sizeof(*err_inj)) {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ if (err_inj->err_inj_spa_range_length <= 0) {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ rc = badrange_add(&t->badrange, err_inj->err_inj_spa_range_base,
+ err_inj->err_inj_spa_range_length);
+ if (rc < 0)
+ goto err;
+
+ if (err_inj->err_inj_options & (1 << ND_ARS_ERR_INJ_OPT_NOTIFY))
+ queue_work(nfit_wq, &t->work);
+
+ err_inj->status = 0;
+ return 0;
+
+err:
+ err_inj->status = NFIT_ARS_INJECT_INVALID;
+ return rc;
+}
+
+static int nfit_test_cmd_ars_inject_clear(struct nfit_test *t,
+ struct nd_cmd_ars_err_inj_clr *err_clr, unsigned int buf_len)
+{
+ int rc;
+
+ if (buf_len != sizeof(*err_clr)) {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ if (err_clr->err_inj_clr_spa_range_length <= 0) {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ badrange_forget(&t->badrange, err_clr->err_inj_clr_spa_range_base,
+ err_clr->err_inj_clr_spa_range_length);
+
+ err_clr->status = 0;
+ return 0;
+
+err:
+ err_clr->status = NFIT_ARS_INJECT_INVALID;
+ return rc;
+}
+
+static int nfit_test_cmd_ars_inject_status(struct nfit_test *t,
+ struct nd_cmd_ars_err_inj_stat *err_stat,
+ unsigned int buf_len)
+{
+ struct badrange_entry *be;
+ int max = SZ_4K / sizeof(struct nd_error_stat_query_record);
+ int i = 0;
+
+ err_stat->status = 0;
+ spin_lock(&t->badrange.lock);
+ list_for_each_entry(be, &t->badrange.list, list) {
+ err_stat->record[i].err_inj_stat_spa_range_base = be->start;
+ err_stat->record[i].err_inj_stat_spa_range_length = be->length;
+ i++;
+ if (i > max)
+ break;
+ }
+ spin_unlock(&t->badrange.lock);
+ err_stat->inj_err_rec_count = i;
+
+ return 0;
+}
+
+static int nd_intel_test_cmd_set_lss_status(struct nfit_test *t,
+ struct nd_intel_lss *nd_cmd, unsigned int buf_len)
+{
+ struct device *dev = &t->pdev.dev;
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+ switch (nd_cmd->enable) {
+ case 0:
+ nd_cmd->status = 0;
+ dev_dbg(dev, "%s: Latch System Shutdown Status disabled\n",
+ __func__);
+ break;
+ case 1:
+ nd_cmd->status = 0;
+ dev_dbg(dev, "%s: Latch System Shutdown Status enabled\n",
+ __func__);
+ break;
+ default:
+ dev_warn(dev, "Unknown enable value: %#x\n", nd_cmd->enable);
+ nd_cmd->status = 0x3;
+ break;
+ }
+
+
+ return 0;
+}
+
+static int override_return_code(int dimm, unsigned int func, int rc)
+{
+ if ((1 << func) & dimm_fail_cmd_flags[dimm]) {
+ if (dimm_fail_cmd_code[dimm])
+ return dimm_fail_cmd_code[dimm];
+ return -EIO;
+ }
+ return rc;
+}
+
+static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
+{
+ int i;
+
+ /* lookup per-dimm data */
+ for (i = 0; i < ARRAY_SIZE(handle); i++)
+ if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i])
+ break;
+ if (i >= ARRAY_SIZE(handle))
+ return -ENXIO;
+ return i;
+}
+
+static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
+ struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+ unsigned int buf_len, int *cmd_rc)
+{
+ struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+ struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc);
+ unsigned int func = cmd;
+ int i, rc = 0, __cmd_rc;
+
+ if (!cmd_rc)
+ cmd_rc = &__cmd_rc;
+ *cmd_rc = 0;
+
+ if (nvdimm) {
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+ unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm);
+
+ if (!nfit_mem)
+ return -ENOTTY;
+
+ if (cmd == ND_CMD_CALL) {
+ struct nd_cmd_pkg *call_pkg = buf;
+
+ buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out;
+ buf = (void *) call_pkg->nd_payload;
+ func = call_pkg->nd_command;
+ if (call_pkg->nd_family != nfit_mem->family)
+ return -ENOTTY;
+
+ i = get_dimm(nfit_mem, func);
+ if (i < 0)
+ return i;
+
+ switch (func) {
+ case ND_INTEL_ENABLE_LSS_STATUS:
+ rc = nd_intel_test_cmd_set_lss_status(t,
+ buf, buf_len);
+ break;
+ case ND_INTEL_FW_GET_INFO:
+ rc = nd_intel_test_get_fw_info(t, buf,
+ buf_len, i - t->dcr_idx);
+ break;
+ case ND_INTEL_FW_START_UPDATE:
+ rc = nd_intel_test_start_update(t, buf,
+ buf_len, i - t->dcr_idx);
+ break;
+ case ND_INTEL_FW_SEND_DATA:
+ rc = nd_intel_test_send_data(t, buf,
+ buf_len, i - t->dcr_idx);
+ break;
+ case ND_INTEL_FW_FINISH_UPDATE:
+ rc = nd_intel_test_finish_fw(t, buf,
+ buf_len, i - t->dcr_idx);
+ break;
+ case ND_INTEL_FW_FINISH_QUERY:
+ rc = nd_intel_test_finish_query(t, buf,
+ buf_len, i - t->dcr_idx);
+ break;
+ case ND_INTEL_SMART:
+ rc = nfit_test_cmd_smart(buf, buf_len,
+ &t->smart[i - t->dcr_idx]);
+ break;
+ case ND_INTEL_SMART_THRESHOLD:
+ rc = nfit_test_cmd_smart_threshold(buf,
+ buf_len,
+ &t->smart_threshold[i -
+ t->dcr_idx]);
+ break;
+ case ND_INTEL_SMART_SET_THRESHOLD:
+ rc = nfit_test_cmd_smart_set_threshold(buf,
+ buf_len,
+ &t->smart_threshold[i -
+ t->dcr_idx],
+ &t->smart[i - t->dcr_idx],
+ &t->pdev.dev, t->dimm_dev[i]);
+ break;
+ case ND_INTEL_SMART_INJECT:
+ rc = nfit_test_cmd_smart_inject(buf,
+ buf_len,
+ &t->smart_threshold[i -
+ t->dcr_idx],
+ &t->smart[i - t->dcr_idx],
+ &t->pdev.dev, t->dimm_dev[i]);
+ break;
+ default:
+ return -ENOTTY;
+ }
+ return override_return_code(i, func, rc);
+ }
+
+ if (!test_bit(cmd, &cmd_mask)
+ || !test_bit(func, &nfit_mem->dsm_mask))
+ return -ENOTTY;
+
+ i = get_dimm(nfit_mem, func);
+ if (i < 0)
+ return i;
+
+ switch (func) {
+ case ND_CMD_GET_CONFIG_SIZE:
+ rc = nfit_test_cmd_get_config_size(buf, buf_len);
+ break;
+ case ND_CMD_GET_CONFIG_DATA:
+ rc = nfit_test_cmd_get_config_data(buf, buf_len,
+ t->label[i - t->dcr_idx]);
+ break;
+ case ND_CMD_SET_CONFIG_DATA:
+ rc = nfit_test_cmd_set_config_data(buf, buf_len,
+ t->label[i - t->dcr_idx]);
+ break;
+ default:
+ return -ENOTTY;
+ }
+ return override_return_code(i, func, rc);
+ } else {
+ struct ars_state *ars_state = &t->ars_state;
+ struct nd_cmd_pkg *call_pkg = buf;
+
+ if (!nd_desc)
+ return -ENOTTY;
+
+ if (cmd == ND_CMD_CALL) {
+ func = call_pkg->nd_command;
+
+ buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out;
+ buf = (void *) call_pkg->nd_payload;
+
+ switch (func) {
+ case NFIT_CMD_TRANSLATE_SPA:
+ rc = nfit_test_cmd_translate_spa(
+ acpi_desc->nvdimm_bus, buf, buf_len);
+ return rc;
+ case NFIT_CMD_ARS_INJECT_SET:
+ rc = nfit_test_cmd_ars_error_inject(t, buf,
+ buf_len);
+ return rc;
+ case NFIT_CMD_ARS_INJECT_CLEAR:
+ rc = nfit_test_cmd_ars_inject_clear(t, buf,
+ buf_len);
+ return rc;
+ case NFIT_CMD_ARS_INJECT_GET:
+ rc = nfit_test_cmd_ars_inject_status(t, buf,
+ buf_len);
+ return rc;
+ default:
+ return -ENOTTY;
+ }
+ }
+
+ if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask))
+ return -ENOTTY;
+
+ switch (func) {
+ case ND_CMD_ARS_CAP:
+ rc = nfit_test_cmd_ars_cap(buf, buf_len);
+ break;
+ case ND_CMD_ARS_START:
+ rc = nfit_test_cmd_ars_start(t, ars_state, buf,
+ buf_len, cmd_rc);
+ break;
+ case ND_CMD_ARS_STATUS:
+ rc = nfit_test_cmd_ars_status(ars_state, buf, buf_len,
+ cmd_rc);
+ break;
+ case ND_CMD_CLEAR_ERROR:
+ rc = nfit_test_cmd_clear_error(t, buf, buf_len, cmd_rc);
+ break;
+ default:
+ return -ENOTTY;
+ }
+ }
+
+ return rc;
+}
+
+static DEFINE_SPINLOCK(nfit_test_lock);
+static struct nfit_test *instances[NUM_NFITS];
+
+static void release_nfit_res(void *data)
+{
+ struct nfit_test_resource *nfit_res = data;
+
+ spin_lock(&nfit_test_lock);
+ list_del(&nfit_res->list);
+ spin_unlock(&nfit_test_lock);
+
+ if (resource_size(&nfit_res->res) >= DIMM_SIZE)
+ gen_pool_free(nfit_pool, nfit_res->res.start,
+ resource_size(&nfit_res->res));
+ vfree(nfit_res->buf);
+ kfree(nfit_res);
+}
+
+static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
+ void *buf)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_resource *nfit_res = kzalloc(sizeof(*nfit_res),
+ GFP_KERNEL);
+ int rc;
+
+ if (!buf || !nfit_res || !*dma)
+ goto err;
+ rc = devm_add_action(dev, release_nfit_res, nfit_res);
+ if (rc)
+ goto err;
+ INIT_LIST_HEAD(&nfit_res->list);
+ memset(buf, 0, size);
+ nfit_res->dev = dev;
+ nfit_res->buf = buf;
+ nfit_res->res.start = *dma;
+ nfit_res->res.end = *dma + size - 1;
+ nfit_res->res.name = "NFIT";
+ spin_lock_init(&nfit_res->lock);
+ INIT_LIST_HEAD(&nfit_res->requests);
+ spin_lock(&nfit_test_lock);
+ list_add(&nfit_res->list, &t->resources);
+ spin_unlock(&nfit_test_lock);
+
+ return nfit_res->buf;
+ err:
+ if (*dma && size >= DIMM_SIZE)
+ gen_pool_free(nfit_pool, *dma, size);
+ if (buf)
+ vfree(buf);
+ kfree(nfit_res);
+ return NULL;
+}
+
+static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma)
+{
+ struct genpool_data_align data = {
+ .align = SZ_128M,
+ };
+ void *buf = vmalloc(size);
+
+ if (size >= DIMM_SIZE)
+ *dma = gen_pool_alloc_algo(nfit_pool, size,
+ gen_pool_first_fit_align, &data);
+ else
+ *dma = (unsigned long) buf;
+ return __test_alloc(t, size, dma, buf);
+}
+
+static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(instances); i++) {
+ struct nfit_test_resource *n, *nfit_res = NULL;
+ struct nfit_test *t = instances[i];
+
+ if (!t)
+ continue;
+ spin_lock(&nfit_test_lock);
+ list_for_each_entry(n, &t->resources, list) {
+ if (addr >= n->res.start && (addr < n->res.start
+ + resource_size(&n->res))) {
+ nfit_res = n;
+ break;
+ } else if (addr >= (unsigned long) n->buf
+ && (addr < (unsigned long) n->buf
+ + resource_size(&n->res))) {
+ nfit_res = n;
+ break;
+ }
+ }
+ spin_unlock(&nfit_test_lock);
+ if (nfit_res)
+ return nfit_res;
+ }
+
+ return NULL;
+}
+
+static int ars_state_init(struct device *dev, struct ars_state *ars_state)
+{
+ /* for testing, only store up to n records that fit within 4k */
+ ars_state->ars_status = devm_kzalloc(dev,
+ sizeof(struct nd_cmd_ars_status) + SZ_4K, GFP_KERNEL);
+ if (!ars_state->ars_status)
+ return -ENOMEM;
+ spin_lock_init(&ars_state->lock);
+ return 0;
+}
+
+static void put_dimms(void *data)
+{
+ struct nfit_test *t = data;
+ int i;
+
+ for (i = 0; i < t->num_dcr; i++)
+ if (t->dimm_dev[i])
+ device_unregister(t->dimm_dev[i]);
+}
+
+static struct class *nfit_test_dimm;
+
+static int dimm_name_to_id(struct device *dev)
+{
+ int dimm;
+
+ if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1)
+ return -ENXIO;
+ return dimm;
+}
+
+static ssize_t handle_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int dimm = dimm_name_to_id(dev);
+
+ if (dimm < 0)
+ return dimm;
+
+ return sprintf(buf, "%#x\n", handle[dimm]);
+}
+DEVICE_ATTR_RO(handle);
+
+static ssize_t fail_cmd_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int dimm = dimm_name_to_id(dev);
+
+ if (dimm < 0)
+ return dimm;
+
+ return sprintf(buf, "%#lx\n", dimm_fail_cmd_flags[dimm]);
+}
+
+static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ int dimm = dimm_name_to_id(dev);
+ unsigned long val;
+ ssize_t rc;
+
+ if (dimm < 0)
+ return dimm;
+
+ rc = kstrtol(buf, 0, &val);
+ if (rc)
+ return rc;
+
+ dimm_fail_cmd_flags[dimm] = val;
+ return size;
+}
+static DEVICE_ATTR_RW(fail_cmd);
+
+static ssize_t fail_cmd_code_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int dimm = dimm_name_to_id(dev);
+
+ if (dimm < 0)
+ return dimm;
+
+ return sprintf(buf, "%d\n", dimm_fail_cmd_code[dimm]);
+}
+
+static ssize_t fail_cmd_code_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ int dimm = dimm_name_to_id(dev);
+ unsigned long val;
+ ssize_t rc;
+
+ if (dimm < 0)
+ return dimm;
+
+ rc = kstrtol(buf, 0, &val);
+ if (rc)
+ return rc;
+
+ dimm_fail_cmd_code[dimm] = val;
+ return size;
+}
+static DEVICE_ATTR_RW(fail_cmd_code);
+
+static struct attribute *nfit_test_dimm_attributes[] = {
+ &dev_attr_fail_cmd.attr,
+ &dev_attr_fail_cmd_code.attr,
+ &dev_attr_handle.attr,
+ NULL,
+};
+
+static struct attribute_group nfit_test_dimm_attribute_group = {
+ .attrs = nfit_test_dimm_attributes,
+};
+
+static const struct attribute_group *nfit_test_dimm_attribute_groups[] = {
+ &nfit_test_dimm_attribute_group,
+ NULL,
+};
+
+static int nfit_test_dimm_init(struct nfit_test *t)
+{
+ int i;
+
+ if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t))
+ return -ENOMEM;
+ for (i = 0; i < t->num_dcr; i++) {
+ t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm,
+ &t->pdev.dev, 0, NULL,
+ nfit_test_dimm_attribute_groups,
+ "test_dimm%d", i + t->dcr_idx);
+ if (!t->dimm_dev[i])
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void smart_init(struct nfit_test *t)
+{
+ int i;
+ const struct nd_intel_smart_threshold smart_t_data = {
+ .alarm_control = ND_INTEL_SMART_SPARE_TRIP
+ | ND_INTEL_SMART_TEMP_TRIP,
+ .media_temperature = 40 * 16,
+ .ctrl_temperature = 30 * 16,
+ .spares = 5,
+ };
+
+ for (i = 0; i < t->num_dcr; i++) {
+ memcpy(&t->smart[i], &smart_def, sizeof(smart_def));
+ memcpy(&t->smart_threshold[i], &smart_t_data,
+ sizeof(smart_t_data));
+ }
+}
+
+static int nfit_test0_alloc(struct nfit_test *t)
+{
+ size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
+ + sizeof(struct acpi_nfit_memory_map) * NUM_MEM
+ + sizeof(struct acpi_nfit_control_region) * NUM_DCR
+ + offsetof(struct acpi_nfit_control_region,
+ window_size) * NUM_DCR
+ + sizeof(struct acpi_nfit_data_region) * NUM_BDW
+ + (sizeof(struct acpi_nfit_flush_address)
+ + sizeof(u64) * NUM_HINTS) * NUM_DCR
+ + sizeof(struct acpi_nfit_capabilities);
+ int i;
+
+ t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
+ if (!t->nfit_buf)
+ return -ENOMEM;
+ t->nfit_size = nfit_size;
+
+ t->spa_set[0] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[0]);
+ if (!t->spa_set[0])
+ return -ENOMEM;
+
+ t->spa_set[1] = test_alloc(t, SPA1_SIZE, &t->spa_set_dma[1]);
+ if (!t->spa_set[1])
+ return -ENOMEM;
+
+ t->spa_set[2] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[2]);
+ if (!t->spa_set[2])
+ return -ENOMEM;
+
+ for (i = 0; i < t->num_dcr; i++) {
+ t->dimm[i] = test_alloc(t, DIMM_SIZE, &t->dimm_dma[i]);
+ if (!t->dimm[i])
+ return -ENOMEM;
+
+ t->label[i] = test_alloc(t, LABEL_SIZE, &t->label_dma[i]);
+ if (!t->label[i])
+ return -ENOMEM;
+ sprintf(t->label[i], "label%d", i);
+
+ t->flush[i] = test_alloc(t, max(PAGE_SIZE,
+ sizeof(u64) * NUM_HINTS),
+ &t->flush_dma[i]);
+ if (!t->flush[i])
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < t->num_dcr; i++) {
+ t->dcr[i] = test_alloc(t, LABEL_SIZE, &t->dcr_dma[i]);
+ if (!t->dcr[i])
+ return -ENOMEM;
+ }
+
+ t->_fit = test_alloc(t, sizeof(union acpi_object **), &t->_fit_dma);
+ if (!t->_fit)
+ return -ENOMEM;
+
+ if (nfit_test_dimm_init(t))
+ return -ENOMEM;
+ smart_init(t);
+ return ars_state_init(&t->pdev.dev, &t->ars_state);
+}
+
+static int nfit_test1_alloc(struct nfit_test *t)
+{
+ size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2
+ + sizeof(struct acpi_nfit_memory_map) * 2
+ + offsetof(struct acpi_nfit_control_region, window_size) * 2;
+ int i;
+
+ t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
+ if (!t->nfit_buf)
+ return -ENOMEM;
+ t->nfit_size = nfit_size;
+
+ t->spa_set[0] = test_alloc(t, SPA2_SIZE, &t->spa_set_dma[0]);
+ if (!t->spa_set[0])
+ return -ENOMEM;
+
+ for (i = 0; i < t->num_dcr; i++) {
+ t->label[i] = test_alloc(t, LABEL_SIZE, &t->label_dma[i]);
+ if (!t->label[i])
+ return -ENOMEM;
+ sprintf(t->label[i], "label%d", i);
+ }
+
+ t->spa_set[1] = test_alloc(t, SPA_VCD_SIZE, &t->spa_set_dma[1]);
+ if (!t->spa_set[1])
+ return -ENOMEM;
+
+ if (nfit_test_dimm_init(t))
+ return -ENOMEM;
+ smart_init(t);
+ return ars_state_init(&t->pdev.dev, &t->ars_state);
+}
+
+static void dcr_common_init(struct acpi_nfit_control_region *dcr)
+{
+ dcr->vendor_id = 0xabcd;
+ dcr->device_id = 0;
+ dcr->revision_id = 1;
+ dcr->valid_fields = 1;
+ dcr->manufacturing_location = 0xa;
+ dcr->manufacturing_date = cpu_to_be16(2016);
+}
+
+static void nfit_test0_setup(struct nfit_test *t)
+{
+ const int flush_hint_size = sizeof(struct acpi_nfit_flush_address)
+ + (sizeof(u64) * NUM_HINTS);
+ struct acpi_nfit_desc *acpi_desc;
+ struct acpi_nfit_memory_map *memdev;
+ void *nfit_buf = t->nfit_buf;
+ struct acpi_nfit_system_address *spa;
+ struct acpi_nfit_control_region *dcr;
+ struct acpi_nfit_data_region *bdw;
+ struct acpi_nfit_flush_address *flush;
+ struct acpi_nfit_capabilities *pcap;
+ unsigned int offset = 0, i;
+
+ /*
+ * spa0 (interleave first half of dimm0 and dimm1, note storage
+ * does not actually alias the related block-data-window
+ * regions)
+ */
+ spa = nfit_buf;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+ spa->range_index = 0+1;
+ spa->address = t->spa_set_dma[0];
+ spa->length = SPA0_SIZE;
+ offset += spa->header.length;
+
+ /*
+ * spa1 (interleave last half of the 4 DIMMS, note storage
+ * does not actually alias the related block-data-window
+ * regions)
+ */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+ spa->range_index = 1+1;
+ spa->address = t->spa_set_dma[1];
+ spa->length = SPA1_SIZE;
+ offset += spa->header.length;
+
+ /* spa2 (dcr0) dimm0 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+ spa->range_index = 2+1;
+ spa->address = t->dcr_dma[0];
+ spa->length = DCR_SIZE;
+ offset += spa->header.length;
+
+ /* spa3 (dcr1) dimm1 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+ spa->range_index = 3+1;
+ spa->address = t->dcr_dma[1];
+ spa->length = DCR_SIZE;
+ offset += spa->header.length;
+
+ /* spa4 (dcr2) dimm2 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+ spa->range_index = 4+1;
+ spa->address = t->dcr_dma[2];
+ spa->length = DCR_SIZE;
+ offset += spa->header.length;
+
+ /* spa5 (dcr3) dimm3 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+ spa->range_index = 5+1;
+ spa->address = t->dcr_dma[3];
+ spa->length = DCR_SIZE;
+ offset += spa->header.length;
+
+ /* spa6 (bdw for dcr0) dimm0 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+ spa->range_index = 6+1;
+ spa->address = t->dimm_dma[0];
+ spa->length = DIMM_SIZE;
+ offset += spa->header.length;
+
+ /* spa7 (bdw for dcr1) dimm1 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+ spa->range_index = 7+1;
+ spa->address = t->dimm_dma[1];
+ spa->length = DIMM_SIZE;
+ offset += spa->header.length;
+
+ /* spa8 (bdw for dcr2) dimm2 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+ spa->range_index = 8+1;
+ spa->address = t->dimm_dma[2];
+ spa->length = DIMM_SIZE;
+ offset += spa->header.length;
+
+ /* spa9 (bdw for dcr3) dimm3 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+ spa->range_index = 9+1;
+ spa->address = t->dimm_dma[3];
+ spa->length = DIMM_SIZE;
+ offset += spa->header.length;
+
+ /* mem-region0 (spa0, dimm0) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[0];
+ memdev->physical_id = 0;
+ memdev->region_id = 0;
+ memdev->range_index = 0+1;
+ memdev->region_index = 4+1;
+ memdev->region_size = SPA0_SIZE/2;
+ memdev->region_offset = 1;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 2;
+ offset += memdev->header.length;
+
+ /* mem-region1 (spa0, dimm1) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[1];
+ memdev->physical_id = 1;
+ memdev->region_id = 0;
+ memdev->range_index = 0+1;
+ memdev->region_index = 5+1;
+ memdev->region_size = SPA0_SIZE/2;
+ memdev->region_offset = (1 << 8);
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 2;
+ memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+ offset += memdev->header.length;
+
+ /* mem-region2 (spa1, dimm0) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[0];
+ memdev->physical_id = 0;
+ memdev->region_id = 1;
+ memdev->range_index = 1+1;
+ memdev->region_index = 4+1;
+ memdev->region_size = SPA1_SIZE/4;
+ memdev->region_offset = (1 << 16);
+ memdev->address = SPA0_SIZE/2;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 4;
+ memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+ offset += memdev->header.length;
+
+ /* mem-region3 (spa1, dimm1) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[1];
+ memdev->physical_id = 1;
+ memdev->region_id = 1;
+ memdev->range_index = 1+1;
+ memdev->region_index = 5+1;
+ memdev->region_size = SPA1_SIZE/4;
+ memdev->region_offset = (1 << 24);
+ memdev->address = SPA0_SIZE/2;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 4;
+ offset += memdev->header.length;
+
+ /* mem-region4 (spa1, dimm2) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[2];
+ memdev->physical_id = 2;
+ memdev->region_id = 0;
+ memdev->range_index = 1+1;
+ memdev->region_index = 6+1;
+ memdev->region_size = SPA1_SIZE/4;
+ memdev->region_offset = (1ULL << 32);
+ memdev->address = SPA0_SIZE/2;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 4;
+ memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+ offset += memdev->header.length;
+
+ /* mem-region5 (spa1, dimm3) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[3];
+ memdev->physical_id = 3;
+ memdev->region_id = 0;
+ memdev->range_index = 1+1;
+ memdev->region_index = 7+1;
+ memdev->region_size = SPA1_SIZE/4;
+ memdev->region_offset = (1ULL << 40);
+ memdev->address = SPA0_SIZE/2;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 4;
+ offset += memdev->header.length;
+
+ /* mem-region6 (spa/dcr0, dimm0) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[0];
+ memdev->physical_id = 0;
+ memdev->region_id = 0;
+ memdev->range_index = 2+1;
+ memdev->region_index = 0+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* mem-region7 (spa/dcr1, dimm1) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[1];
+ memdev->physical_id = 1;
+ memdev->region_id = 0;
+ memdev->range_index = 3+1;
+ memdev->region_index = 1+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* mem-region8 (spa/dcr2, dimm2) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[2];
+ memdev->physical_id = 2;
+ memdev->region_id = 0;
+ memdev->range_index = 4+1;
+ memdev->region_index = 2+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* mem-region9 (spa/dcr3, dimm3) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[3];
+ memdev->physical_id = 3;
+ memdev->region_id = 0;
+ memdev->range_index = 5+1;
+ memdev->region_index = 3+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* mem-region10 (spa/bdw0, dimm0) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[0];
+ memdev->physical_id = 0;
+ memdev->region_id = 0;
+ memdev->range_index = 6+1;
+ memdev->region_index = 0+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* mem-region11 (spa/bdw1, dimm1) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[1];
+ memdev->physical_id = 1;
+ memdev->region_id = 0;
+ memdev->range_index = 7+1;
+ memdev->region_index = 1+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* mem-region12 (spa/bdw2, dimm2) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[2];
+ memdev->physical_id = 2;
+ memdev->region_id = 0;
+ memdev->range_index = 8+1;
+ memdev->region_index = 2+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* mem-region13 (spa/dcr3, dimm3) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[3];
+ memdev->physical_id = 3;
+ memdev->region_id = 0;
+ memdev->range_index = 9+1;
+ memdev->region_index = 3+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+ offset += memdev->header.length;
+
+ /* dcr-descriptor0: blk */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = sizeof(*dcr);
+ dcr->region_index = 0+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[0];
+ dcr->code = NFIT_FIC_BLK;
+ dcr->windows = 1;
+ dcr->window_size = DCR_SIZE;
+ dcr->command_offset = 0;
+ dcr->command_size = 8;
+ dcr->status_offset = 8;
+ dcr->status_size = 4;
+ offset += dcr->header.length;
+
+ /* dcr-descriptor1: blk */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = sizeof(*dcr);
+ dcr->region_index = 1+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[1];
+ dcr->code = NFIT_FIC_BLK;
+ dcr->windows = 1;
+ dcr->window_size = DCR_SIZE;
+ dcr->command_offset = 0;
+ dcr->command_size = 8;
+ dcr->status_offset = 8;
+ dcr->status_size = 4;
+ offset += dcr->header.length;
+
+ /* dcr-descriptor2: blk */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = sizeof(*dcr);
+ dcr->region_index = 2+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[2];
+ dcr->code = NFIT_FIC_BLK;
+ dcr->windows = 1;
+ dcr->window_size = DCR_SIZE;
+ dcr->command_offset = 0;
+ dcr->command_size = 8;
+ dcr->status_offset = 8;
+ dcr->status_size = 4;
+ offset += dcr->header.length;
+
+ /* dcr-descriptor3: blk */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = sizeof(*dcr);
+ dcr->region_index = 3+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[3];
+ dcr->code = NFIT_FIC_BLK;
+ dcr->windows = 1;
+ dcr->window_size = DCR_SIZE;
+ dcr->command_offset = 0;
+ dcr->command_size = 8;
+ dcr->status_offset = 8;
+ dcr->status_size = 4;
+ offset += dcr->header.length;
+
+ /* dcr-descriptor0: pmem */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 4+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[0];
+ dcr->code = NFIT_FIC_BYTEN;
+ dcr->windows = 0;
+ offset += dcr->header.length;
+
+ /* dcr-descriptor1: pmem */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 5+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[1];
+ dcr->code = NFIT_FIC_BYTEN;
+ dcr->windows = 0;
+ offset += dcr->header.length;
+
+ /* dcr-descriptor2: pmem */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 6+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[2];
+ dcr->code = NFIT_FIC_BYTEN;
+ dcr->windows = 0;
+ offset += dcr->header.length;
+
+ /* dcr-descriptor3: pmem */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 7+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[3];
+ dcr->code = NFIT_FIC_BYTEN;
+ dcr->windows = 0;
+ offset += dcr->header.length;
+
+ /* bdw0 (spa/dcr0, dimm0) */
+ bdw = nfit_buf + offset;
+ bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+ bdw->header.length = sizeof(*bdw);
+ bdw->region_index = 0+1;
+ bdw->windows = 1;
+ bdw->offset = 0;
+ bdw->size = BDW_SIZE;
+ bdw->capacity = DIMM_SIZE;
+ bdw->start_address = 0;
+ offset += bdw->header.length;
+
+ /* bdw1 (spa/dcr1, dimm1) */
+ bdw = nfit_buf + offset;
+ bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+ bdw->header.length = sizeof(*bdw);
+ bdw->region_index = 1+1;
+ bdw->windows = 1;
+ bdw->offset = 0;
+ bdw->size = BDW_SIZE;
+ bdw->capacity = DIMM_SIZE;
+ bdw->start_address = 0;
+ offset += bdw->header.length;
+
+ /* bdw2 (spa/dcr2, dimm2) */
+ bdw = nfit_buf + offset;
+ bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+ bdw->header.length = sizeof(*bdw);
+ bdw->region_index = 2+1;
+ bdw->windows = 1;
+ bdw->offset = 0;
+ bdw->size = BDW_SIZE;
+ bdw->capacity = DIMM_SIZE;
+ bdw->start_address = 0;
+ offset += bdw->header.length;
+
+ /* bdw3 (spa/dcr3, dimm3) */
+ bdw = nfit_buf + offset;
+ bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+ bdw->header.length = sizeof(*bdw);
+ bdw->region_index = 3+1;
+ bdw->windows = 1;
+ bdw->offset = 0;
+ bdw->size = BDW_SIZE;
+ bdw->capacity = DIMM_SIZE;
+ bdw->start_address = 0;
+ offset += bdw->header.length;
+
+ /* flush0 (dimm0) */
+ flush = nfit_buf + offset;
+ flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+ flush->header.length = flush_hint_size;
+ flush->device_handle = handle[0];
+ flush->hint_count = NUM_HINTS;
+ for (i = 0; i < NUM_HINTS; i++)
+ flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64);
+ offset += flush->header.length;
+
+ /* flush1 (dimm1) */
+ flush = nfit_buf + offset;
+ flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+ flush->header.length = flush_hint_size;
+ flush->device_handle = handle[1];
+ flush->hint_count = NUM_HINTS;
+ for (i = 0; i < NUM_HINTS; i++)
+ flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64);
+ offset += flush->header.length;
+
+ /* flush2 (dimm2) */
+ flush = nfit_buf + offset;
+ flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+ flush->header.length = flush_hint_size;
+ flush->device_handle = handle[2];
+ flush->hint_count = NUM_HINTS;
+ for (i = 0; i < NUM_HINTS; i++)
+ flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64);
+ offset += flush->header.length;
+
+ /* flush3 (dimm3) */
+ flush = nfit_buf + offset;
+ flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+ flush->header.length = flush_hint_size;
+ flush->device_handle = handle[3];
+ flush->hint_count = NUM_HINTS;
+ for (i = 0; i < NUM_HINTS; i++)
+ flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);
+ offset += flush->header.length;
+
+ /* platform capabilities */
+ pcap = nfit_buf + offset;
+ pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES;
+ pcap->header.length = sizeof(*pcap);
+ pcap->highest_capability = 1;
+ pcap->capabilities = ACPI_NFIT_CAPABILITY_MEM_FLUSH;
+ offset += pcap->header.length;
+
+ if (t->setup_hotplug) {
+ /* dcr-descriptor4: blk */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = sizeof(*dcr);
+ dcr->region_index = 8+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[4];
+ dcr->code = NFIT_FIC_BLK;
+ dcr->windows = 1;
+ dcr->window_size = DCR_SIZE;
+ dcr->command_offset = 0;
+ dcr->command_size = 8;
+ dcr->status_offset = 8;
+ dcr->status_size = 4;
+ offset += dcr->header.length;
+
+ /* dcr-descriptor4: pmem */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 9+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[4];
+ dcr->code = NFIT_FIC_BYTEN;
+ dcr->windows = 0;
+ offset += dcr->header.length;
+
+ /* bdw4 (spa/dcr4, dimm4) */
+ bdw = nfit_buf + offset;
+ bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+ bdw->header.length = sizeof(*bdw);
+ bdw->region_index = 8+1;
+ bdw->windows = 1;
+ bdw->offset = 0;
+ bdw->size = BDW_SIZE;
+ bdw->capacity = DIMM_SIZE;
+ bdw->start_address = 0;
+ offset += bdw->header.length;
+
+ /* spa10 (dcr4) dimm4 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+ spa->range_index = 10+1;
+ spa->address = t->dcr_dma[4];
+ spa->length = DCR_SIZE;
+ offset += spa->header.length;
+
+ /*
+ * spa11 (single-dimm interleave for hotplug, note storage
+ * does not actually alias the related block-data-window
+ * regions)
+ */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+ spa->range_index = 11+1;
+ spa->address = t->spa_set_dma[2];
+ spa->length = SPA0_SIZE;
+ offset += spa->header.length;
+
+ /* spa12 (bdw for dcr4) dimm4 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+ spa->range_index = 12+1;
+ spa->address = t->dimm_dma[4];
+ spa->length = DIMM_SIZE;
+ offset += spa->header.length;
+
+ /* mem-region14 (spa/dcr4, dimm4) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[4];
+ memdev->physical_id = 4;
+ memdev->region_id = 0;
+ memdev->range_index = 10+1;
+ memdev->region_index = 8+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* mem-region15 (spa11, dimm4) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[4];
+ memdev->physical_id = 4;
+ memdev->region_id = 0;
+ memdev->range_index = 11+1;
+ memdev->region_index = 9+1;
+ memdev->region_size = SPA0_SIZE;
+ memdev->region_offset = (1ULL << 48);
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+ offset += memdev->header.length;
+
+ /* mem-region16 (spa/bdw4, dimm4) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[4];
+ memdev->physical_id = 4;
+ memdev->region_id = 0;
+ memdev->range_index = 12+1;
+ memdev->region_index = 8+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* flush3 (dimm4) */
+ flush = nfit_buf + offset;
+ flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+ flush->header.length = flush_hint_size;
+ flush->device_handle = handle[4];
+ flush->hint_count = NUM_HINTS;
+ for (i = 0; i < NUM_HINTS; i++)
+ flush->hint_address[i] = t->flush_dma[4]
+ + i * sizeof(u64);
+ offset += flush->header.length;
+
+ /* sanity check to make sure we've filled the buffer */
+ WARN_ON(offset != t->nfit_size);
+ }
+
+ t->nfit_filled = offset;
+
+ post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
+ SPA0_SIZE);
+
+ acpi_desc = &t->acpi_desc;
+ set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_SMART_INJECT, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
+ set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
+ set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
+ set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
+ set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en);
+ set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en);
+ set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en);
+ set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en);
+ set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en);
+ set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_FW_FINISH_UPDATE, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_FW_FINISH_QUERY, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
+}
+
+static void nfit_test1_setup(struct nfit_test *t)
+{
+ size_t offset;
+ void *nfit_buf = t->nfit_buf;
+ struct acpi_nfit_memory_map *memdev;
+ struct acpi_nfit_control_region *dcr;
+ struct acpi_nfit_system_address *spa;
+ struct acpi_nfit_desc *acpi_desc;
+
+ offset = 0;
+ /* spa0 (flat range with no bdw aliasing) */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+ spa->range_index = 0+1;
+ spa->address = t->spa_set_dma[0];
+ spa->length = SPA2_SIZE;
+ offset += spa->header.length;
+
+ /* virtual cd region */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
+ spa->range_index = 0;
+ spa->address = t->spa_set_dma[1];
+ spa->length = SPA_VCD_SIZE;
+ offset += spa->header.length;
+
+ /* mem-region0 (spa0, dimm0) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[5];
+ memdev->physical_id = 0;
+ memdev->region_id = 0;
+ memdev->range_index = 0+1;
+ memdev->region_index = 0+1;
+ memdev->region_size = SPA2_SIZE;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED
+ | ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED
+ | ACPI_NFIT_MEM_NOT_ARMED;
+ offset += memdev->header.length;
+
+ /* dcr-descriptor0 */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 0+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[5];
+ dcr->code = NFIT_FIC_BYTE;
+ dcr->windows = 0;
+ offset += dcr->header.length;
+
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[6];
+ memdev->physical_id = 0;
+ memdev->region_id = 0;
+ memdev->range_index = 0;
+ memdev->region_index = 0+2;
+ memdev->region_size = SPA2_SIZE;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ memdev->flags = ACPI_NFIT_MEM_MAP_FAILED;
+ offset += memdev->header.length;
+
+ /* dcr-descriptor1 */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 0+2;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[6];
+ dcr->code = NFIT_FIC_BYTE;
+ dcr->windows = 0;
+ offset += dcr->header.length;
+
+ /* sanity check to make sure we've filled the buffer */
+ WARN_ON(offset != t->nfit_size);
+
+ t->nfit_filled = offset;
+
+ post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
+ SPA2_SIZE);
+
+ acpi_desc = &t->acpi_desc;
+ set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
+ set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
+ set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
+ set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
+ set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
+}
+
+static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
+ void *iobuf, u64 len, int rw)
+{
+ struct nfit_blk *nfit_blk = ndbr->blk_provider_data;
+ struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
+ struct nd_region *nd_region = &ndbr->nd_region;
+ unsigned int lane;
+
+ lane = nd_region_acquire_lane(nd_region);
+ if (rw)
+ memcpy(mmio->addr.base + dpa, iobuf, len);
+ else {
+ memcpy(iobuf, mmio->addr.base + dpa, len);
+
+ /* give us some some coverage of the arch_invalidate_pmem() API */
+ arch_invalidate_pmem(mmio->addr.base + dpa, len);
+ }
+ nd_region_release_lane(nd_region, lane);
+
+ return 0;
+}
+
+static unsigned long nfit_ctl_handle;
+
+union acpi_object *result;
+
+static union acpi_object *nfit_test_evaluate_dsm(acpi_handle handle,
+ const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4)
+{
+ if (handle != &nfit_ctl_handle)
+ return ERR_PTR(-ENXIO);
+
+ return result;
+}
+
+static int setup_result(void *buf, size_t size)
+{
+ result = kmalloc(sizeof(union acpi_object) + size, GFP_KERNEL);
+ if (!result)
+ return -ENOMEM;
+ result->package.type = ACPI_TYPE_BUFFER,
+ result->buffer.pointer = (void *) (result + 1);
+ result->buffer.length = size;
+ memcpy(result->buffer.pointer, buf, size);
+ memset(buf, 0, size);
+ return 0;
+}
+
+static int nfit_ctl_test(struct device *dev)
+{
+ int rc, cmd_rc;
+ struct nvdimm *nvdimm;
+ struct acpi_device *adev;
+ struct nfit_mem *nfit_mem;
+ struct nd_ars_record *record;
+ struct acpi_nfit_desc *acpi_desc;
+ const u64 test_val = 0x0123456789abcdefULL;
+ unsigned long mask, cmd_size, offset;
+ union {
+ struct nd_cmd_get_config_size cfg_size;
+ struct nd_cmd_clear_error clear_err;
+ struct nd_cmd_ars_status ars_stat;
+ struct nd_cmd_ars_cap ars_cap;
+ char buf[sizeof(struct nd_cmd_ars_status)
+ + sizeof(struct nd_ars_record)];
+ } cmds;
+
+ adev = devm_kzalloc(dev, sizeof(*adev), GFP_KERNEL);
+ if (!adev)
+ return -ENOMEM;
+ *adev = (struct acpi_device) {
+ .handle = &nfit_ctl_handle,
+ .dev = {
+ .init_name = "test-adev",
+ },
+ };
+
+ acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
+ if (!acpi_desc)
+ return -ENOMEM;
+ *acpi_desc = (struct acpi_nfit_desc) {
+ .nd_desc = {
+ .cmd_mask = 1UL << ND_CMD_ARS_CAP
+ | 1UL << ND_CMD_ARS_START
+ | 1UL << ND_CMD_ARS_STATUS
+ | 1UL << ND_CMD_CLEAR_ERROR
+ | 1UL << ND_CMD_CALL,
+ .module = THIS_MODULE,
+ .provider_name = "ACPI.NFIT",
+ .ndctl = acpi_nfit_ctl,
+ .bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA
+ | 1UL << NFIT_CMD_ARS_INJECT_SET
+ | 1UL << NFIT_CMD_ARS_INJECT_CLEAR
+ | 1UL << NFIT_CMD_ARS_INJECT_GET,
+ },
+ .dev = &adev->dev,
+ };
+
+ nfit_mem = devm_kzalloc(dev, sizeof(*nfit_mem), GFP_KERNEL);
+ if (!nfit_mem)
+ return -ENOMEM;
+
+ mask = 1UL << ND_CMD_SMART | 1UL << ND_CMD_SMART_THRESHOLD
+ | 1UL << ND_CMD_DIMM_FLAGS | 1UL << ND_CMD_GET_CONFIG_SIZE
+ | 1UL << ND_CMD_GET_CONFIG_DATA | 1UL << ND_CMD_SET_CONFIG_DATA
+ | 1UL << ND_CMD_VENDOR;
+ *nfit_mem = (struct nfit_mem) {
+ .adev = adev,
+ .family = NVDIMM_FAMILY_INTEL,
+ .dsm_mask = mask,
+ };
+
+ nvdimm = devm_kzalloc(dev, sizeof(*nvdimm), GFP_KERNEL);
+ if (!nvdimm)
+ return -ENOMEM;
+ *nvdimm = (struct nvdimm) {
+ .provider_data = nfit_mem,
+ .cmd_mask = mask,
+ .dev = {
+ .init_name = "test-dimm",
+ },
+ };
+
+
+ /* basic checkout of a typical 'get config size' command */
+ cmd_size = sizeof(cmds.cfg_size);
+ cmds.cfg_size = (struct nd_cmd_get_config_size) {
+ .status = 0,
+ .config_size = SZ_128K,
+ .max_xfer = SZ_4K,
+ };
+ rc = setup_result(cmds.buf, cmd_size);
+ if (rc)
+ return rc;
+ rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE,
+ cmds.buf, cmd_size, &cmd_rc);
+
+ if (rc < 0 || cmd_rc || cmds.cfg_size.status != 0
+ || cmds.cfg_size.config_size != SZ_128K
+ || cmds.cfg_size.max_xfer != SZ_4K) {
+ dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+ __func__, __LINE__, rc, cmd_rc);
+ return -EIO;
+ }
+
+
+ /* test ars_status with zero output */
+ cmd_size = offsetof(struct nd_cmd_ars_status, address);
+ cmds.ars_stat = (struct nd_cmd_ars_status) {
+ .out_length = 0,
+ };
+ rc = setup_result(cmds.buf, cmd_size);
+ if (rc)
+ return rc;
+ rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
+ cmds.buf, cmd_size, &cmd_rc);
+
+ if (rc < 0 || cmd_rc) {
+ dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+ __func__, __LINE__, rc, cmd_rc);
+ return -EIO;
+ }
+
+
+ /* test ars_cap with benign extended status */
+ cmd_size = sizeof(cmds.ars_cap);
+ cmds.ars_cap = (struct nd_cmd_ars_cap) {
+ .status = ND_ARS_PERSISTENT << 16,
+ };
+ offset = offsetof(struct nd_cmd_ars_cap, status);
+ rc = setup_result(cmds.buf + offset, cmd_size - offset);
+ if (rc)
+ return rc;
+ rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_CAP,
+ cmds.buf, cmd_size, &cmd_rc);
+
+ if (rc < 0 || cmd_rc) {
+ dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+ __func__, __LINE__, rc, cmd_rc);
+ return -EIO;
+ }
+
+
+ /* test ars_status with 'status' trimmed from 'out_length' */
+ cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record);
+ cmds.ars_stat = (struct nd_cmd_ars_status) {
+ .out_length = cmd_size - 4,
+ };
+ record = &cmds.ars_stat.records[0];
+ *record = (struct nd_ars_record) {
+ .length = test_val,
+ };
+ rc = setup_result(cmds.buf, cmd_size);
+ if (rc)
+ return rc;
+ rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
+ cmds.buf, cmd_size, &cmd_rc);
+
+ if (rc < 0 || cmd_rc || record->length != test_val) {
+ dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+ __func__, __LINE__, rc, cmd_rc);
+ return -EIO;
+ }
+
+
+ /* test ars_status with 'Output (Size)' including 'status' */
+ cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record);
+ cmds.ars_stat = (struct nd_cmd_ars_status) {
+ .out_length = cmd_size,
+ };
+ record = &cmds.ars_stat.records[0];
+ *record = (struct nd_ars_record) {
+ .length = test_val,
+ };
+ rc = setup_result(cmds.buf, cmd_size);
+ if (rc)
+ return rc;
+ rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
+ cmds.buf, cmd_size, &cmd_rc);
+
+ if (rc < 0 || cmd_rc || record->length != test_val) {
+ dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+ __func__, __LINE__, rc, cmd_rc);
+ return -EIO;
+ }
+
+
+ /* test extended status for get_config_size results in failure */
+ cmd_size = sizeof(cmds.cfg_size);
+ cmds.cfg_size = (struct nd_cmd_get_config_size) {
+ .status = 1 << 16,
+ };
+ rc = setup_result(cmds.buf, cmd_size);
+ if (rc)
+ return rc;
+ rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE,
+ cmds.buf, cmd_size, &cmd_rc);
+
+ if (rc < 0 || cmd_rc >= 0) {
+ dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+ __func__, __LINE__, rc, cmd_rc);
+ return -EIO;
+ }
+
+ /* test clear error */
+ cmd_size = sizeof(cmds.clear_err);
+ cmds.clear_err = (struct nd_cmd_clear_error) {
+ .length = 512,
+ .cleared = 512,
+ };
+ rc = setup_result(cmds.buf, cmd_size);
+ if (rc)
+ return rc;
+ rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CLEAR_ERROR,
+ cmds.buf, cmd_size, &cmd_rc);
+ if (rc < 0 || cmd_rc) {
+ dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+ __func__, __LINE__, rc, cmd_rc);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int nfit_test_probe(struct platform_device *pdev)
+{
+ struct nvdimm_bus_descriptor *nd_desc;
+ struct acpi_nfit_desc *acpi_desc;
+ struct device *dev = &pdev->dev;
+ struct nfit_test *nfit_test;
+ struct nfit_mem *nfit_mem;
+ union acpi_object *obj;
+ int rc;
+
+ if (strcmp(dev_name(&pdev->dev), "nfit_test.0") == 0) {
+ rc = nfit_ctl_test(&pdev->dev);
+ if (rc)
+ return rc;
+ }
+
+ nfit_test = to_nfit_test(&pdev->dev);
+
+ /* common alloc */
+ if (nfit_test->num_dcr) {
+ int num = nfit_test->num_dcr;
+
+ nfit_test->dimm = devm_kcalloc(dev, num, sizeof(void *),
+ GFP_KERNEL);
+ nfit_test->dimm_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t),
+ GFP_KERNEL);
+ nfit_test->flush = devm_kcalloc(dev, num, sizeof(void *),
+ GFP_KERNEL);
+ nfit_test->flush_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t),
+ GFP_KERNEL);
+ nfit_test->label = devm_kcalloc(dev, num, sizeof(void *),
+ GFP_KERNEL);
+ nfit_test->label_dma = devm_kcalloc(dev, num,
+ sizeof(dma_addr_t), GFP_KERNEL);
+ nfit_test->dcr = devm_kcalloc(dev, num,
+ sizeof(struct nfit_test_dcr *), GFP_KERNEL);
+ nfit_test->dcr_dma = devm_kcalloc(dev, num,
+ sizeof(dma_addr_t), GFP_KERNEL);
+ nfit_test->smart = devm_kcalloc(dev, num,
+ sizeof(struct nd_intel_smart), GFP_KERNEL);
+ nfit_test->smart_threshold = devm_kcalloc(dev, num,
+ sizeof(struct nd_intel_smart_threshold),
+ GFP_KERNEL);
+ nfit_test->fw = devm_kcalloc(dev, num,
+ sizeof(struct nfit_test_fw), GFP_KERNEL);
+ if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label
+ && nfit_test->label_dma && nfit_test->dcr
+ && nfit_test->dcr_dma && nfit_test->flush
+ && nfit_test->flush_dma
+ && nfit_test->fw)
+ /* pass */;
+ else
+ return -ENOMEM;
+ }
+
+ if (nfit_test->num_pm) {
+ int num = nfit_test->num_pm;
+
+ nfit_test->spa_set = devm_kcalloc(dev, num, sizeof(void *),
+ GFP_KERNEL);
+ nfit_test->spa_set_dma = devm_kcalloc(dev, num,
+ sizeof(dma_addr_t), GFP_KERNEL);
+ if (nfit_test->spa_set && nfit_test->spa_set_dma)
+ /* pass */;
+ else
+ return -ENOMEM;
+ }
+
+ /* per-nfit specific alloc */
+ if (nfit_test->alloc(nfit_test))
+ return -ENOMEM;
+
+ nfit_test->setup(nfit_test);
+ acpi_desc = &nfit_test->acpi_desc;
+ acpi_nfit_desc_init(acpi_desc, &pdev->dev);
+ acpi_desc->blk_do_io = nfit_test_blk_do_io;
+ nd_desc = &acpi_desc->nd_desc;
+ nd_desc->provider_name = NULL;
+ nd_desc->module = THIS_MODULE;
+ nd_desc->ndctl = nfit_test_ctl;
+
+ rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
+ nfit_test->nfit_filled);
+ if (rc)
+ return rc;
+
+ rc = devm_add_action_or_reset(&pdev->dev, acpi_nfit_shutdown, acpi_desc);
+ if (rc)
+ return rc;
+
+ if (nfit_test->setup != nfit_test0_setup)
+ return 0;
+
+ nfit_test->setup_hotplug = 1;
+ nfit_test->setup(nfit_test);
+
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+ obj->type = ACPI_TYPE_BUFFER;
+ obj->buffer.length = nfit_test->nfit_size;
+ obj->buffer.pointer = nfit_test->nfit_buf;
+ *(nfit_test->_fit) = obj;
+ __acpi_nfit_notify(&pdev->dev, nfit_test, 0x80);
+
+ /* associate dimm devices with nfit_mem data for notification testing */
+ mutex_lock(&acpi_desc->init_mutex);
+ list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
+ u32 nfit_handle = __to_nfit_memdev(nfit_mem)->device_handle;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(handle); i++)
+ if (nfit_handle == handle[i])
+ dev_set_drvdata(nfit_test->dimm_dev[i],
+ nfit_mem);
+ }
+ mutex_unlock(&acpi_desc->init_mutex);
+
+ return 0;
+}
+
+static int nfit_test_remove(struct platform_device *pdev)
+{
+ return 0;
+}
+
+static void nfit_test_release(struct device *dev)
+{
+ struct nfit_test *nfit_test = to_nfit_test(dev);
+
+ kfree(nfit_test);
+}
+
+static const struct platform_device_id nfit_test_id[] = {
+ { KBUILD_MODNAME },
+ { },
+};
+
+static struct platform_driver nfit_test_driver = {
+ .probe = nfit_test_probe,
+ .remove = nfit_test_remove,
+ .driver = {
+ .name = KBUILD_MODNAME,
+ },
+ .id_table = nfit_test_id,
+};
+
+static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+
+enum INJECT {
+ INJECT_NONE,
+ INJECT_SRC,
+ INJECT_DST,
+};
+
+static void mcsafe_test_init(char *dst, char *src, size_t size)
+{
+ size_t i;
+
+ memset(dst, 0xff, size);
+ for (i = 0; i < size; i++)
+ src[i] = (char) i;
+}
+
+static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
+ size_t size, unsigned long rem)
+{
+ size_t i;
+
+ for (i = 0; i < size - rem; i++)
+ if (dst[i] != (unsigned char) i) {
+ pr_info_once("%s:%d: offset: %zd got: %#x expect: %#x\n",
+ __func__, __LINE__, i, dst[i],
+ (unsigned char) i);
+ return false;
+ }
+ for (i = size - rem; i < size; i++)
+ if (dst[i] != 0xffU) {
+ pr_info_once("%s:%d: offset: %zd got: %#x expect: 0xff\n",
+ __func__, __LINE__, i, dst[i]);
+ return false;
+ }
+ return true;
+}
+
+void mcsafe_test(void)
+{
+ char *inject_desc[] = { "none", "source", "destination" };
+ enum INJECT inj;
+
+ if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
+ pr_info("%s: run...\n", __func__);
+ } else {
+ pr_info("%s: disabled, skip.\n", __func__);
+ return;
+ }
+
+ for (inj = INJECT_NONE; inj <= INJECT_DST; inj++) {
+ int i;
+
+ pr_info("%s: inject: %s\n", __func__, inject_desc[inj]);
+ for (i = 0; i < 512; i++) {
+ unsigned long expect, rem;
+ void *src, *dst;
+ bool valid;
+
+ switch (inj) {
+ case INJECT_NONE:
+ mcsafe_inject_src(NULL);
+ mcsafe_inject_dst(NULL);
+ dst = &mcsafe_buf[2048];
+ src = &mcsafe_buf[1024 - i];
+ expect = 0;
+ break;
+ case INJECT_SRC:
+ mcsafe_inject_src(&mcsafe_buf[1024]);
+ mcsafe_inject_dst(NULL);
+ dst = &mcsafe_buf[2048];
+ src = &mcsafe_buf[1024 - i];
+ expect = 512 - i;
+ break;
+ case INJECT_DST:
+ mcsafe_inject_src(NULL);
+ mcsafe_inject_dst(&mcsafe_buf[2048]);
+ dst = &mcsafe_buf[2048 - i];
+ src = &mcsafe_buf[1024];
+ expect = 512 - i;
+ break;
+ }
+
+ mcsafe_test_init(dst, src, 512);
+ rem = __memcpy_mcsafe(dst, src, 512);
+ valid = mcsafe_test_validate(dst, src, 512, expect);
+ if (rem == expect && valid)
+ continue;
+ pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
+ __func__,
+ ((unsigned long) dst) & ~PAGE_MASK,
+ ((unsigned long ) src) & ~PAGE_MASK,
+ 512, i, rem, valid ? "valid" : "bad",
+ expect);
+ }
+ }
+
+ mcsafe_inject_src(NULL);
+ mcsafe_inject_dst(NULL);
+}
+
+static __init int nfit_test_init(void)
+{
+ int rc, i;
+
+ pmem_test();
+ libnvdimm_test();
+ acpi_nfit_test();
+ device_dax_test();
+ mcsafe_test();
+
+ nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);
+
+ nfit_wq = create_singlethread_workqueue("nfit");
+ if (!nfit_wq)
+ return -ENOMEM;
+
+ nfit_test_dimm = class_create(THIS_MODULE, "nfit_test_dimm");
+ if (IS_ERR(nfit_test_dimm)) {
+ rc = PTR_ERR(nfit_test_dimm);
+ goto err_register;
+ }
+
+ nfit_pool = gen_pool_create(ilog2(SZ_4M), NUMA_NO_NODE);
+ if (!nfit_pool) {
+ rc = -ENOMEM;
+ goto err_register;
+ }
+
+ if (gen_pool_add(nfit_pool, SZ_4G, SZ_4G, NUMA_NO_NODE)) {
+ rc = -ENOMEM;
+ goto err_register;
+ }
+
+ for (i = 0; i < NUM_NFITS; i++) {
+ struct nfit_test *nfit_test;
+ struct platform_device *pdev;
+
+ nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL);
+ if (!nfit_test) {
+ rc = -ENOMEM;
+ goto err_register;
+ }
+ INIT_LIST_HEAD(&nfit_test->resources);
+ badrange_init(&nfit_test->badrange);
+ switch (i) {
+ case 0:
+ nfit_test->num_pm = NUM_PM;
+ nfit_test->dcr_idx = 0;
+ nfit_test->num_dcr = NUM_DCR;
+ nfit_test->alloc = nfit_test0_alloc;
+ nfit_test->setup = nfit_test0_setup;
+ break;
+ case 1:
+ nfit_test->num_pm = 2;
+ nfit_test->dcr_idx = NUM_DCR;
+ nfit_test->num_dcr = 2;
+ nfit_test->alloc = nfit_test1_alloc;
+ nfit_test->setup = nfit_test1_setup;
+ break;
+ default:
+ rc = -EINVAL;
+ goto err_register;
+ }
+ pdev = &nfit_test->pdev;
+ pdev->name = KBUILD_MODNAME;
+ pdev->id = i;
+ pdev->dev.release = nfit_test_release;
+ rc = platform_device_register(pdev);
+ if (rc) {
+ put_device(&pdev->dev);
+ goto err_register;
+ }
+ get_device(&pdev->dev);
+
+ rc = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (rc)
+ goto err_register;
+
+ instances[i] = nfit_test;
+ INIT_WORK(&nfit_test->work, uc_error_notify);
+ }
+
+ rc = platform_driver_register(&nfit_test_driver);
+ if (rc)
+ goto err_register;
+ return 0;
+
+ err_register:
+ if (nfit_pool)
+ gen_pool_destroy(nfit_pool);
+
+ destroy_workqueue(nfit_wq);
+ for (i = 0; i < NUM_NFITS; i++)
+ if (instances[i])
+ platform_device_unregister(&instances[i]->pdev);
+ nfit_test_teardown();
+ for (i = 0; i < NUM_NFITS; i++)
+ if (instances[i])
+ put_device(&instances[i]->pdev.dev);
+
+ return rc;
+}
+
+static __exit void nfit_test_exit(void)
+{
+ int i;
+
+ flush_workqueue(nfit_wq);
+ destroy_workqueue(nfit_wq);
+ for (i = 0; i < NUM_NFITS; i++)
+ platform_device_unregister(&instances[i]->pdev);
+ platform_driver_unregister(&nfit_test_driver);
+ nfit_test_teardown();
+
+ gen_pool_destroy(nfit_pool);
+
+ for (i = 0; i < NUM_NFITS; i++)
+ put_device(&instances[i]->pdev.dev);
+ class_destroy(nfit_test_dimm);
+}
+
+module_init(nfit_test_init);
+module_exit(nfit_test_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
new file mode 100644
index 000000000..3de57cc87
--- /dev/null
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#ifndef __NFIT_TEST_H__
+#define __NFIT_TEST_H__
+#include <linux/acpi.h>
+#include <linux/list.h>
+#include <linux/uuid.h>
+#include <linux/ioport.h>
+#include <linux/spinlock_types.h>
+
+struct nfit_test_request {
+ struct list_head list;
+ struct resource res;
+};
+
+struct nfit_test_resource {
+ struct list_head requests;
+ struct list_head list;
+ struct resource res;
+ struct device *dev;
+ spinlock_t lock;
+ int req_count;
+ void *buf;
+};
+
+#define ND_TRANSLATE_SPA_STATUS_INVALID_SPA 2
+#define NFIT_ARS_INJECT_INVALID 2
+
+enum err_inj_options {
+ ND_ARS_ERR_INJ_OPT_NOTIFY = 0,
+};
+
+/* nfit commands */
+enum nfit_cmd_num {
+ NFIT_CMD_TRANSLATE_SPA = 5,
+ NFIT_CMD_ARS_INJECT_SET = 7,
+ NFIT_CMD_ARS_INJECT_CLEAR = 8,
+ NFIT_CMD_ARS_INJECT_GET = 9,
+};
+
+struct nd_cmd_translate_spa {
+ __u64 spa;
+ __u32 status;
+ __u8 flags;
+ __u8 _reserved[3];
+ __u64 translate_length;
+ __u32 num_nvdimms;
+ struct nd_nvdimm_device {
+ __u32 nfit_device_handle;
+ __u32 _reserved;
+ __u64 dpa;
+ } __packed devices[0];
+
+} __packed;
+
+struct nd_cmd_ars_err_inj {
+ __u64 err_inj_spa_range_base;
+ __u64 err_inj_spa_range_length;
+ __u8 err_inj_options;
+ __u32 status;
+} __packed;
+
+struct nd_cmd_ars_err_inj_clr {
+ __u64 err_inj_clr_spa_range_base;
+ __u64 err_inj_clr_spa_range_length;
+ __u32 status;
+} __packed;
+
+struct nd_cmd_ars_err_inj_stat {
+ __u32 status;
+ __u32 inj_err_rec_count;
+ struct nd_error_stat_query_record {
+ __u64 err_inj_stat_spa_range_base;
+ __u64 err_inj_stat_spa_range_length;
+ } __packed record[0];
+} __packed;
+
+#define ND_INTEL_SMART 1
+#define ND_INTEL_SMART_THRESHOLD 2
+#define ND_INTEL_ENABLE_LSS_STATUS 10
+#define ND_INTEL_FW_GET_INFO 12
+#define ND_INTEL_FW_START_UPDATE 13
+#define ND_INTEL_FW_SEND_DATA 14
+#define ND_INTEL_FW_FINISH_UPDATE 15
+#define ND_INTEL_FW_FINISH_QUERY 16
+#define ND_INTEL_SMART_SET_THRESHOLD 17
+#define ND_INTEL_SMART_INJECT 18
+
+#define ND_INTEL_SMART_HEALTH_VALID (1 << 0)
+#define ND_INTEL_SMART_SPARES_VALID (1 << 1)
+#define ND_INTEL_SMART_USED_VALID (1 << 2)
+#define ND_INTEL_SMART_MTEMP_VALID (1 << 3)
+#define ND_INTEL_SMART_CTEMP_VALID (1 << 4)
+#define ND_INTEL_SMART_SHUTDOWN_COUNT_VALID (1 << 5)
+#define ND_INTEL_SMART_AIT_STATUS_VALID (1 << 6)
+#define ND_INTEL_SMART_PTEMP_VALID (1 << 7)
+#define ND_INTEL_SMART_ALARM_VALID (1 << 9)
+#define ND_INTEL_SMART_SHUTDOWN_VALID (1 << 10)
+#define ND_INTEL_SMART_VENDOR_VALID (1 << 11)
+#define ND_INTEL_SMART_SPARE_TRIP (1 << 0)
+#define ND_INTEL_SMART_TEMP_TRIP (1 << 1)
+#define ND_INTEL_SMART_CTEMP_TRIP (1 << 2)
+#define ND_INTEL_SMART_NON_CRITICAL_HEALTH (1 << 0)
+#define ND_INTEL_SMART_CRITICAL_HEALTH (1 << 1)
+#define ND_INTEL_SMART_FATAL_HEALTH (1 << 2)
+#define ND_INTEL_SMART_INJECT_MTEMP (1 << 0)
+#define ND_INTEL_SMART_INJECT_SPARE (1 << 1)
+#define ND_INTEL_SMART_INJECT_FATAL (1 << 2)
+#define ND_INTEL_SMART_INJECT_SHUTDOWN (1 << 3)
+
+struct nd_intel_smart {
+ __u32 status;
+ union {
+ struct {
+ __u32 flags;
+ __u8 reserved0[4];
+ __u8 health;
+ __u8 spares;
+ __u8 life_used;
+ __u8 alarm_flags;
+ __u16 media_temperature;
+ __u16 ctrl_temperature;
+ __u32 shutdown_count;
+ __u8 ait_status;
+ __u16 pmic_temperature;
+ __u8 reserved1[8];
+ __u8 shutdown_state;
+ __u32 vendor_size;
+ __u8 vendor_data[92];
+ } __packed;
+ __u8 data[128];
+ };
+} __packed;
+
+struct nd_intel_smart_threshold {
+ __u32 status;
+ union {
+ struct {
+ __u16 alarm_control;
+ __u8 spares;
+ __u16 media_temperature;
+ __u16 ctrl_temperature;
+ __u8 reserved[1];
+ } __packed;
+ __u8 data[8];
+ };
+} __packed;
+
+struct nd_intel_smart_set_threshold {
+ __u16 alarm_control;
+ __u8 spares;
+ __u16 media_temperature;
+ __u16 ctrl_temperature;
+ __u32 status;
+} __packed;
+
+struct nd_intel_smart_inject {
+ __u64 flags;
+ __u8 mtemp_enable;
+ __u16 media_temperature;
+ __u8 spare_enable;
+ __u8 spares;
+ __u8 fatal_enable;
+ __u8 unsafe_shutdown_enable;
+ __u32 status;
+} __packed;
+
+#define INTEL_FW_STORAGE_SIZE 0x100000
+#define INTEL_FW_MAX_SEND_LEN 0xFFEC
+#define INTEL_FW_QUERY_INTERVAL 250000
+#define INTEL_FW_QUERY_MAX_TIME 3000000
+#define INTEL_FW_FIS_VERSION 0x0105
+#define INTEL_FW_FAKE_VERSION 0xffffffffabcd
+
+enum intel_fw_update_state {
+ FW_STATE_NEW = 0,
+ FW_STATE_IN_PROGRESS,
+ FW_STATE_VERIFY,
+ FW_STATE_UPDATED,
+};
+
+struct nd_intel_fw_info {
+ __u32 status;
+ __u32 storage_size;
+ __u32 max_send_len;
+ __u32 query_interval;
+ __u32 max_query_time;
+ __u8 update_cap;
+ __u8 reserved[3];
+ __u32 fis_version;
+ __u64 run_version;
+ __u64 updated_version;
+} __packed;
+
+struct nd_intel_fw_start {
+ __u32 status;
+ __u32 context;
+} __packed;
+
+/* this one has the output first because the variable input data size */
+struct nd_intel_fw_send_data {
+ __u32 context;
+ __u32 offset;
+ __u32 length;
+ __u8 data[0];
+/* this field is not declared due ot variable data from input */
+/* __u32 status; */
+} __packed;
+
+struct nd_intel_fw_finish_update {
+ __u8 ctrl_flags;
+ __u8 reserved[3];
+ __u32 context;
+ __u32 status;
+} __packed;
+
+struct nd_intel_fw_finish_query {
+ __u32 context;
+ __u32 status;
+ __u64 updated_fw_rev;
+} __packed;
+
+struct nd_intel_lss {
+ __u8 enable;
+ __u32 status;
+} __packed;
+
+typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t);
+typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle,
+ const guid_t *guid, u64 rev, u64 func,
+ union acpi_object *argv4);
+void __iomem *__wrap_ioremap_nocache(resource_size_t offset,
+ unsigned long size);
+void __wrap_iounmap(volatile void __iomem *addr);
+void nfit_test_setup(nfit_test_lookup_fn lookup,
+ nfit_test_evaluate_dsm_fn evaluate);
+void nfit_test_teardown(void);
+struct nfit_test_resource *get_nfit_res(resource_size_t resource);
+#endif
diff --git a/tools/testing/nvdimm/watermark.h b/tools/testing/nvdimm/watermark.h
new file mode 100644
index 000000000..ed0528757
--- /dev/null
+++ b/tools/testing/nvdimm/watermark.h
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+#ifndef _TEST_NVDIMM_WATERMARK_H_
+#define _TEST_NVDIMM_WATERMARK_H_
+int pmem_test(void);
+int libnvdimm_test(void);
+int acpi_nfit_test(void);
+int device_dax_test(void);
+
+/*
+ * dummy routine for nfit_test to validate it is linking to the properly
+ * mocked module and not the standard one from the base tree.
+ */
+#define nfit_test_watermark(x) \
+int x##_test(void) \
+{ \
+ pr_debug("%s for nfit_test\n", KBUILD_MODNAME); \
+ return 0; \
+} \
+EXPORT_SYMBOL(x##_test)
+#endif /* _TEST_NVDIMM_WATERMARK_H_ */
diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
new file mode 100644
index 000000000..d4706c0ff
--- /dev/null
+++ b/tools/testing/radix-tree/.gitignore
@@ -0,0 +1,6 @@
+generated/map-shift.h
+idr.c
+idr-test
+main
+multiorder
+radix-tree.c
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
new file mode 100644
index 000000000..37baecc37
--- /dev/null
+++ b/tools/testing/radix-tree/Makefile
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \
+ -fsanitize=undefined
+LDFLAGS += -fsanitize=address -fsanitize=undefined
+LDLIBS+= -lpthread -lurcu
+TARGETS = main idr-test multiorder
+CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
+OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
+ tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
+
+ifndef SHIFT
+ SHIFT=3
+endif
+
+ifeq ($(BUILD), 32)
+ CFLAGS += -m32
+ LDFLAGS += -m32
+endif
+
+targets: generated/map-shift.h $(TARGETS)
+
+main: $(OFILES)
+
+idr-test.o: ../../../lib/test_ida.c
+idr-test: idr-test.o $(CORE_OFILES)
+
+multiorder: multiorder.o $(CORE_OFILES)
+
+clean:
+ $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h
+
+vpath %.c ../../lib
+
+$(OFILES): Makefile *.h */*.h generated/map-shift.h \
+ ../../include/linux/*.h \
+ ../../include/asm/*.h \
+ ../../../include/linux/radix-tree.h \
+ ../../../include/linux/idr.h
+
+radix-tree.c: ../../../lib/radix-tree.c
+ sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+
+idr.c: ../../../lib/idr.c
+ sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+
+generated/map-shift.h:
+ @if ! grep -qws $(SHIFT) generated/map-shift.h; then \
+ echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \
+ generated/map-shift.h; \
+ fi
diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
new file mode 100644
index 000000000..99c40f3ed
--- /dev/null
+++ b/tools/testing/radix-tree/benchmark.c
@@ -0,0 +1,257 @@
+/*
+ * benchmark.c:
+ * Author: Konstantin Khlebnikov <koct9i@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/radix-tree.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <time.h>
+#include "test.h"
+
+#define for_each_index(i, base, order) \
+ for (i = base; i < base + (1 << order); i++)
+
+#define NSEC_PER_SEC 1000000000L
+
+static long long benchmark_iter(struct radix_tree_root *root, bool tagged)
+{
+ volatile unsigned long sink = 0;
+ struct radix_tree_iter iter;
+ struct timespec start, finish;
+ long long nsec;
+ int l, loops = 1;
+ void **slot;
+
+#ifdef BENCHMARK
+again:
+#endif
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ for (l = 0; l < loops; l++) {
+ if (tagged) {
+ radix_tree_for_each_tagged(slot, root, &iter, 0, 0)
+ sink ^= (unsigned long)slot;
+ } else {
+ radix_tree_for_each_slot(slot, root, &iter, 0)
+ sink ^= (unsigned long)slot;
+ }
+ }
+ clock_gettime(CLOCK_MONOTONIC, &finish);
+
+ nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+ (finish.tv_nsec - start.tv_nsec);
+
+#ifdef BENCHMARK
+ if (loops == 1 && nsec * 5 < NSEC_PER_SEC) {
+ loops = NSEC_PER_SEC / nsec / 4 + 1;
+ goto again;
+ }
+#endif
+
+ nsec /= loops;
+ return nsec;
+}
+
+static void benchmark_insert(struct radix_tree_root *root,
+ unsigned long size, unsigned long step, int order)
+{
+ struct timespec start, finish;
+ unsigned long index;
+ long long nsec;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ for (index = 0 ; index < size ; index += step)
+ item_insert_order(root, index, order);
+
+ clock_gettime(CLOCK_MONOTONIC, &finish);
+
+ nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+ (finish.tv_nsec - start.tv_nsec);
+
+ printv(2, "Size: %8ld, step: %8ld, order: %d, insertion: %15lld ns\n",
+ size, step, order, nsec);
+}
+
+static void benchmark_tagging(struct radix_tree_root *root,
+ unsigned long size, unsigned long step, int order)
+{
+ struct timespec start, finish;
+ unsigned long index;
+ long long nsec;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ for (index = 0 ; index < size ; index += step)
+ radix_tree_tag_set(root, index, 0);
+
+ clock_gettime(CLOCK_MONOTONIC, &finish);
+
+ nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+ (finish.tv_nsec - start.tv_nsec);
+
+ printv(2, "Size: %8ld, step: %8ld, order: %d, tagging: %17lld ns\n",
+ size, step, order, nsec);
+}
+
+static void benchmark_delete(struct radix_tree_root *root,
+ unsigned long size, unsigned long step, int order)
+{
+ struct timespec start, finish;
+ unsigned long index, i;
+ long long nsec;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ for (index = 0 ; index < size ; index += step)
+ for_each_index(i, index, order)
+ item_delete(root, i);
+
+ clock_gettime(CLOCK_MONOTONIC, &finish);
+
+ nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+ (finish.tv_nsec - start.tv_nsec);
+
+ printv(2, "Size: %8ld, step: %8ld, order: %d, deletion: %16lld ns\n",
+ size, step, order, nsec);
+}
+
+static void benchmark_size(unsigned long size, unsigned long step, int order)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ long long normal, tagged;
+
+ benchmark_insert(&tree, size, step, order);
+ benchmark_tagging(&tree, size, step, order);
+
+ tagged = benchmark_iter(&tree, true);
+ normal = benchmark_iter(&tree, false);
+
+ printv(2, "Size: %8ld, step: %8ld, order: %d, tagged iteration: %8lld ns\n",
+ size, step, order, tagged);
+ printv(2, "Size: %8ld, step: %8ld, order: %d, normal iteration: %8lld ns\n",
+ size, step, order, normal);
+
+ benchmark_delete(&tree, size, step, order);
+
+ item_kill_tree(&tree);
+ rcu_barrier();
+}
+
+static long long __benchmark_split(unsigned long index,
+ int old_order, int new_order)
+{
+ struct timespec start, finish;
+ long long nsec;
+ RADIX_TREE(tree, GFP_ATOMIC);
+
+ item_insert_order(&tree, index, old_order);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ radix_tree_split(&tree, index, new_order);
+ clock_gettime(CLOCK_MONOTONIC, &finish);
+ nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+ (finish.tv_nsec - start.tv_nsec);
+
+ item_kill_tree(&tree);
+
+ return nsec;
+
+}
+
+static void benchmark_split(unsigned long size, unsigned long step)
+{
+ int i, j, idx;
+ long long nsec = 0;
+
+
+ for (idx = 0; idx < size; idx += step) {
+ for (i = 3; i < 11; i++) {
+ for (j = 0; j < i; j++) {
+ nsec += __benchmark_split(idx, i, j);
+ }
+ }
+ }
+
+ printv(2, "Size %8ld, step %8ld, split time %10lld ns\n",
+ size, step, nsec);
+
+}
+
+static long long __benchmark_join(unsigned long index,
+ unsigned order1, unsigned order2)
+{
+ unsigned long loc;
+ struct timespec start, finish;
+ long long nsec;
+ void *item, *item2 = item_create(index + 1, order1);
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ item_insert_order(&tree, index, order2);
+ item = radix_tree_lookup(&tree, index);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ radix_tree_join(&tree, index + 1, order1, item2);
+ clock_gettime(CLOCK_MONOTONIC, &finish);
+ nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+ (finish.tv_nsec - start.tv_nsec);
+
+ loc = find_item(&tree, item);
+ if (loc == -1)
+ free(item);
+
+ item_kill_tree(&tree);
+
+ return nsec;
+}
+
+static void benchmark_join(unsigned long step)
+{
+ int i, j, idx;
+ long long nsec = 0;
+
+ for (idx = 0; idx < 1 << 10; idx += step) {
+ for (i = 1; i < 15; i++) {
+ for (j = 0; j < i; j++) {
+ nsec += __benchmark_join(idx, i, j);
+ }
+ }
+ }
+
+ printv(2, "Size %8d, step %8ld, join time %10lld ns\n",
+ 1 << 10, step, nsec);
+}
+
+void benchmark(void)
+{
+ unsigned long size[] = {1 << 10, 1 << 20, 0};
+ unsigned long step[] = {1, 2, 7, 15, 63, 64, 65,
+ 128, 256, 512, 12345, 0};
+ int c, s;
+
+ printv(1, "starting benchmarks\n");
+ printv(1, "RADIX_TREE_MAP_SHIFT = %d\n", RADIX_TREE_MAP_SHIFT);
+
+ for (c = 0; size[c]; c++)
+ for (s = 0; step[s]; s++)
+ benchmark_size(size[c], step[s], 0);
+
+ for (c = 0; size[c]; c++)
+ for (s = 0; step[s]; s++)
+ benchmark_size(size[c], step[s] << 9, 9);
+
+ for (c = 0; size[c]; c++)
+ for (s = 0; step[s]; s++)
+ benchmark_split(size[c], step[s]);
+
+ for (s = 0; step[s]; s++)
+ benchmark_join(step[s]);
+}
diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h
new file mode 100644
index 000000000..cf88dc5b8
--- /dev/null
+++ b/tools/testing/radix-tree/generated/autoconf.h
@@ -0,0 +1 @@
+#define CONFIG_RADIX_TREE_MULTIORDER 1
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
new file mode 100644
index 000000000..235eef71f
--- /dev/null
+++ b/tools/testing/radix-tree/idr-test.c
@@ -0,0 +1,512 @@
+/*
+ * idr-test.c: Test the IDR API
+ * Copyright (c) 2016 Matthew Wilcox <willy@infradead.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/bitmap.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+#include "test.h"
+
+#define DUMMY_PTR ((void *)0x12)
+
+int item_idr_free(int id, void *p, void *data)
+{
+ struct item *item = p;
+ assert(item->index == id);
+ free(p);
+
+ return 0;
+}
+
+void item_idr_remove(struct idr *idr, int id)
+{
+ struct item *item = idr_find(idr, id);
+ assert(item->index == id);
+ idr_remove(idr, id);
+ free(item);
+}
+
+void idr_alloc_test(void)
+{
+ unsigned long i;
+ DEFINE_IDR(idr);
+
+ assert(idr_alloc_cyclic(&idr, DUMMY_PTR, 0, 0x4000, GFP_KERNEL) == 0);
+ assert(idr_alloc_cyclic(&idr, DUMMY_PTR, 0x3ffd, 0x4000, GFP_KERNEL) == 0x3ffd);
+ idr_remove(&idr, 0x3ffd);
+ idr_remove(&idr, 0);
+
+ for (i = 0x3ffe; i < 0x4003; i++) {
+ int id;
+ struct item *item;
+
+ if (i < 0x4000)
+ item = item_create(i, 0);
+ else
+ item = item_create(i - 0x3fff, 0);
+
+ id = idr_alloc_cyclic(&idr, item, 1, 0x4000, GFP_KERNEL);
+ assert(id == item->index);
+ }
+
+ idr_for_each(&idr, item_idr_free, &idr);
+ idr_destroy(&idr);
+}
+
+void idr_replace_test(void)
+{
+ DEFINE_IDR(idr);
+
+ idr_alloc(&idr, (void *)-1, 10, 11, GFP_KERNEL);
+ idr_replace(&idr, &idr, 10);
+
+ idr_destroy(&idr);
+}
+
+/*
+ * Unlike the radix tree, you can put a NULL pointer -- with care -- into
+ * the IDR. Some interfaces, like idr_find() do not distinguish between
+ * "present, value is NULL" and "not present", but that's exactly what some
+ * users want.
+ */
+void idr_null_test(void)
+{
+ int i;
+ DEFINE_IDR(idr);
+
+ assert(idr_is_empty(&idr));
+
+ assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+ assert(!idr_is_empty(&idr));
+ idr_remove(&idr, 0);
+ assert(idr_is_empty(&idr));
+
+ assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+ assert(!idr_is_empty(&idr));
+ idr_destroy(&idr);
+ assert(idr_is_empty(&idr));
+
+ for (i = 0; i < 10; i++) {
+ assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == i);
+ }
+
+ assert(idr_replace(&idr, DUMMY_PTR, 3) == NULL);
+ assert(idr_replace(&idr, DUMMY_PTR, 4) == NULL);
+ assert(idr_replace(&idr, NULL, 4) == DUMMY_PTR);
+ assert(idr_replace(&idr, DUMMY_PTR, 11) == ERR_PTR(-ENOENT));
+ idr_remove(&idr, 5);
+ assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 5);
+ idr_remove(&idr, 5);
+
+ for (i = 0; i < 9; i++) {
+ idr_remove(&idr, i);
+ assert(!idr_is_empty(&idr));
+ }
+ idr_remove(&idr, 8);
+ assert(!idr_is_empty(&idr));
+ idr_remove(&idr, 9);
+ assert(idr_is_empty(&idr));
+
+ assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+ assert(idr_replace(&idr, DUMMY_PTR, 3) == ERR_PTR(-ENOENT));
+ assert(idr_replace(&idr, DUMMY_PTR, 0) == NULL);
+ assert(idr_replace(&idr, NULL, 0) == DUMMY_PTR);
+
+ idr_destroy(&idr);
+ assert(idr_is_empty(&idr));
+
+ for (i = 1; i < 10; i++) {
+ assert(idr_alloc(&idr, NULL, 1, 0, GFP_KERNEL) == i);
+ }
+
+ idr_destroy(&idr);
+ assert(idr_is_empty(&idr));
+}
+
+void idr_nowait_test(void)
+{
+ unsigned int i;
+ DEFINE_IDR(idr);
+
+ idr_preload(GFP_KERNEL);
+
+ for (i = 0; i < 3; i++) {
+ struct item *item = item_create(i, 0);
+ assert(idr_alloc(&idr, item, i, i + 1, GFP_NOWAIT) == i);
+ }
+
+ idr_preload_end();
+
+ idr_for_each(&idr, item_idr_free, &idr);
+ idr_destroy(&idr);
+}
+
+void idr_get_next_test(int base)
+{
+ unsigned long i;
+ int nextid;
+ DEFINE_IDR(idr);
+ idr_init_base(&idr, base);
+
+ int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0};
+
+ for(i = 0; indices[i]; i++) {
+ struct item *item = item_create(indices[i], 0);
+ assert(idr_alloc(&idr, item, indices[i], indices[i+1],
+ GFP_KERNEL) == indices[i]);
+ }
+
+ for(i = 0, nextid = 0; indices[i]; i++) {
+ idr_get_next(&idr, &nextid);
+ assert(nextid == indices[i]);
+ nextid++;
+ }
+
+ idr_for_each(&idr, item_idr_free, &idr);
+ idr_destroy(&idr);
+}
+
+int idr_u32_cb(int id, void *ptr, void *data)
+{
+ BUG_ON(id < 0);
+ BUG_ON(ptr != DUMMY_PTR);
+ return 0;
+}
+
+void idr_u32_test1(struct idr *idr, u32 handle)
+{
+ static bool warned = false;
+ u32 id = handle;
+ int sid = 0;
+ void *ptr;
+
+ BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL));
+ BUG_ON(id != handle);
+ BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL) != -ENOSPC);
+ BUG_ON(id != handle);
+ if (!warned && id > INT_MAX)
+ printk("vvv Ignore these warnings\n");
+ ptr = idr_get_next(idr, &sid);
+ if (id > INT_MAX) {
+ BUG_ON(ptr != NULL);
+ BUG_ON(sid != 0);
+ } else {
+ BUG_ON(ptr != DUMMY_PTR);
+ BUG_ON(sid != id);
+ }
+ idr_for_each(idr, idr_u32_cb, NULL);
+ if (!warned && id > INT_MAX) {
+ printk("^^^ Warnings over\n");
+ warned = true;
+ }
+ BUG_ON(idr_remove(idr, id) != DUMMY_PTR);
+ BUG_ON(!idr_is_empty(idr));
+}
+
+void idr_u32_test(int base)
+{
+ DEFINE_IDR(idr);
+ idr_init_base(&idr, base);
+ idr_u32_test1(&idr, 10);
+ idr_u32_test1(&idr, 0x7fffffff);
+ idr_u32_test1(&idr, 0x80000000);
+ idr_u32_test1(&idr, 0x80000001);
+ idr_u32_test1(&idr, 0xffe00000);
+ idr_u32_test1(&idr, 0xffffffff);
+}
+
+static inline void *idr_mk_value(unsigned long v)
+{
+ BUG_ON((long)v < 0);
+ return (void *)((v & 1) | 2 | (v << 1));
+}
+
+DEFINE_IDR(find_idr);
+
+static void *idr_throbber(void *arg)
+{
+ time_t start = time(NULL);
+ int id = *(int *)arg;
+
+ rcu_register_thread();
+ do {
+ idr_alloc(&find_idr, idr_mk_value(id), id, id + 1, GFP_KERNEL);
+ idr_remove(&find_idr, id);
+ } while (time(NULL) < start + 10);
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+void idr_find_test_1(int anchor_id, int throbber_id)
+{
+ pthread_t throbber;
+ time_t start = time(NULL);
+
+ pthread_create(&throbber, NULL, idr_throbber, &throbber_id);
+
+ BUG_ON(idr_alloc(&find_idr, idr_mk_value(anchor_id), anchor_id,
+ anchor_id + 1, GFP_KERNEL) != anchor_id);
+
+ do {
+ int id = 0;
+ void *entry = idr_get_next(&find_idr, &id);
+ BUG_ON(entry != idr_mk_value(id));
+ } while (time(NULL) < start + 11);
+
+ pthread_join(throbber, NULL);
+
+ idr_remove(&find_idr, anchor_id);
+ BUG_ON(!idr_is_empty(&find_idr));
+}
+
+void idr_find_test(void)
+{
+ idr_find_test_1(100000, 0);
+ idr_find_test_1(0, 100000);
+}
+
+void idr_checks(void)
+{
+ unsigned long i;
+ DEFINE_IDR(idr);
+
+ for (i = 0; i < 10000; i++) {
+ struct item *item = item_create(i, 0);
+ assert(idr_alloc(&idr, item, 0, 20000, GFP_KERNEL) == i);
+ }
+
+ assert(idr_alloc(&idr, DUMMY_PTR, 5, 30, GFP_KERNEL) < 0);
+
+ for (i = 0; i < 5000; i++)
+ item_idr_remove(&idr, i);
+
+ idr_remove(&idr, 3);
+
+ idr_for_each(&idr, item_idr_free, &idr);
+ idr_destroy(&idr);
+
+ assert(idr_is_empty(&idr));
+
+ idr_remove(&idr, 3);
+ idr_remove(&idr, 0);
+
+ assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == 0);
+ idr_remove(&idr, 1);
+ for (i = 1; i < RADIX_TREE_MAP_SIZE; i++)
+ assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == i);
+ idr_remove(&idr, 1 << 30);
+ idr_destroy(&idr);
+
+ for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) {
+ struct item *item = item_create(i, 0);
+ assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i);
+ }
+ assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i, GFP_KERNEL) == -ENOSPC);
+ assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i + 10, GFP_KERNEL) == -ENOSPC);
+
+ idr_for_each(&idr, item_idr_free, &idr);
+ idr_destroy(&idr);
+ idr_destroy(&idr);
+
+ assert(idr_is_empty(&idr));
+
+ idr_set_cursor(&idr, INT_MAX - 3UL);
+ for (i = INT_MAX - 3UL; i < INT_MAX + 3UL; i++) {
+ struct item *item;
+ unsigned int id;
+ if (i <= INT_MAX)
+ item = item_create(i, 0);
+ else
+ item = item_create(i - INT_MAX - 1, 0);
+
+ id = idr_alloc_cyclic(&idr, item, 0, 0, GFP_KERNEL);
+ assert(id == item->index);
+ }
+
+ idr_for_each(&idr, item_idr_free, &idr);
+ idr_destroy(&idr);
+ assert(idr_is_empty(&idr));
+
+ for (i = 1; i < 10000; i++) {
+ struct item *item = item_create(i, 0);
+ assert(idr_alloc(&idr, item, 1, 20000, GFP_KERNEL) == i);
+ }
+
+ idr_for_each(&idr, item_idr_free, &idr);
+ idr_destroy(&idr);
+
+ idr_replace_test();
+ idr_alloc_test();
+ idr_null_test();
+ idr_nowait_test();
+ idr_get_next_test(0);
+ idr_get_next_test(1);
+ idr_get_next_test(4);
+ idr_u32_test(4);
+ idr_u32_test(1);
+ idr_u32_test(0);
+ idr_find_test();
+}
+
+#define module_init(x)
+#define module_exit(x)
+#define MODULE_AUTHOR(x)
+#define MODULE_LICENSE(x)
+#define dump_stack() assert(0)
+void ida_dump(struct ida *);
+
+#include "../../../lib/test_ida.c"
+
+/*
+ * Check that we get the correct error when we run out of memory doing
+ * allocations. In userspace, GFP_NOWAIT will always fail an allocation.
+ * The first test is for not having a bitmap available, and the second test
+ * is for not being able to allocate a level of the radix tree.
+ */
+void ida_check_nomem(void)
+{
+ DEFINE_IDA(ida);
+ int id;
+
+ id = ida_alloc_min(&ida, 256, GFP_NOWAIT);
+ IDA_BUG_ON(&ida, id != -ENOMEM);
+ id = ida_alloc_min(&ida, 1UL << 30, GFP_NOWAIT);
+ IDA_BUG_ON(&ida, id != -ENOMEM);
+ IDA_BUG_ON(&ida, !ida_is_empty(&ida));
+}
+
+/*
+ * Check handling of conversions between exceptional entries and full bitmaps.
+ */
+void ida_check_conv_user(void)
+{
+ DEFINE_IDA(ida);
+ unsigned long i;
+
+ radix_tree_cpu_dead(1);
+ for (i = 0; i < 1000000; i++) {
+ int id = ida_alloc(&ida, GFP_NOWAIT);
+ if (id == -ENOMEM) {
+ IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) !=
+ BITS_PER_LONG - 2);
+ id = ida_alloc(&ida, GFP_KERNEL);
+ } else {
+ IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) ==
+ BITS_PER_LONG - 2);
+ }
+ IDA_BUG_ON(&ida, id != i);
+ }
+ ida_destroy(&ida);
+}
+
+void ida_check_random(void)
+{
+ DEFINE_IDA(ida);
+ DECLARE_BITMAP(bitmap, 2048);
+ unsigned int i;
+ time_t s = time(NULL);
+
+ repeat:
+ memset(bitmap, 0, sizeof(bitmap));
+ for (i = 0; i < 100000; i++) {
+ int i = rand();
+ int bit = i & 2047;
+ if (test_bit(bit, bitmap)) {
+ __clear_bit(bit, bitmap);
+ ida_free(&ida, bit);
+ } else {
+ __set_bit(bit, bitmap);
+ IDA_BUG_ON(&ida, ida_alloc_min(&ida, bit, GFP_KERNEL)
+ != bit);
+ }
+ }
+ ida_destroy(&ida);
+ if (time(NULL) < s + 10)
+ goto repeat;
+}
+
+void ida_simple_get_remove_test(void)
+{
+ DEFINE_IDA(ida);
+ unsigned long i;
+
+ for (i = 0; i < 10000; i++) {
+ assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i);
+ }
+ assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0);
+
+ for (i = 0; i < 10000; i++) {
+ ida_simple_remove(&ida, i);
+ }
+ assert(ida_is_empty(&ida));
+
+ ida_destroy(&ida);
+}
+
+void user_ida_checks(void)
+{
+ radix_tree_cpu_dead(1);
+
+ ida_check_nomem();
+ ida_check_conv_user();
+ ida_check_random();
+ ida_simple_get_remove_test();
+
+ radix_tree_cpu_dead(1);
+}
+
+static void *ida_random_fn(void *arg)
+{
+ rcu_register_thread();
+ ida_check_random();
+ rcu_unregister_thread();
+ return NULL;
+}
+
+void ida_thread_tests(void)
+{
+ pthread_t threads[20];
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(threads); i++)
+ if (pthread_create(&threads[i], NULL, ida_random_fn, NULL)) {
+ perror("creating ida thread");
+ exit(1);
+ }
+
+ while (i--)
+ pthread_join(threads[i], NULL);
+}
+
+void ida_tests(void)
+{
+ user_ida_checks();
+ ida_checks();
+ ida_exit();
+ ida_thread_tests();
+}
+
+int __weak main(void)
+{
+ radix_tree_init();
+ idr_checks();
+ ida_tests();
+ radix_tree_cpu_dead(1);
+ rcu_barrier();
+ if (nr_allocated)
+ printf("nr_allocated = %d\n", nr_allocated);
+ return 0;
+}
diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c
new file mode 100644
index 000000000..a92bab513
--- /dev/null
+++ b/tools/testing/radix-tree/iteration_check.c
@@ -0,0 +1,221 @@
+/*
+ * iteration_check.c: test races having to do with radix tree iteration
+ * Copyright (c) 2016 Intel Corporation
+ * Author: Ross Zwisler <ross.zwisler@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/radix-tree.h>
+#include <pthread.h>
+#include "test.h"
+
+#define NUM_THREADS 5
+#define MAX_IDX 100
+#define TAG 0
+#define NEW_TAG 1
+
+static pthread_mutex_t tree_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_t threads[NUM_THREADS];
+static unsigned int seeds[3];
+static RADIX_TREE(tree, GFP_KERNEL);
+static bool test_complete;
+static int max_order;
+
+/* relentlessly fill the tree with tagged entries */
+static void *add_entries_fn(void *arg)
+{
+ rcu_register_thread();
+
+ while (!test_complete) {
+ unsigned long pgoff;
+ int order;
+
+ for (pgoff = 0; pgoff < MAX_IDX; pgoff++) {
+ pthread_mutex_lock(&tree_lock);
+ for (order = max_order; order >= 0; order--) {
+ if (item_insert_order(&tree, pgoff, order)
+ == 0) {
+ item_tag_set(&tree, pgoff, TAG);
+ break;
+ }
+ }
+ pthread_mutex_unlock(&tree_lock);
+ }
+ }
+
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+/*
+ * Iterate over the tagged entries, doing a radix_tree_iter_retry() as we find
+ * things that have been removed and randomly resetting our iteration to the
+ * next chunk with radix_tree_iter_resume(). Both radix_tree_iter_retry() and
+ * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a
+ * NULL 'slot' variable.
+ */
+static void *tagged_iteration_fn(void *arg)
+{
+ struct radix_tree_iter iter;
+ void **slot;
+
+ rcu_register_thread();
+
+ while (!test_complete) {
+ rcu_read_lock();
+ radix_tree_for_each_tagged(slot, &tree, &iter, 0, TAG) {
+ void *entry = radix_tree_deref_slot(slot);
+ if (unlikely(!entry))
+ continue;
+
+ if (radix_tree_deref_retry(entry)) {
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+
+ if (rand_r(&seeds[0]) % 50 == 0) {
+ slot = radix_tree_iter_resume(slot, &iter);
+ rcu_read_unlock();
+ rcu_barrier();
+ rcu_read_lock();
+ }
+ }
+ rcu_read_unlock();
+ }
+
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+/*
+ * Iterate over the entries, doing a radix_tree_iter_retry() as we find things
+ * that have been removed and randomly resetting our iteration to the next
+ * chunk with radix_tree_iter_resume(). Both radix_tree_iter_retry() and
+ * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a
+ * NULL 'slot' variable.
+ */
+static void *untagged_iteration_fn(void *arg)
+{
+ struct radix_tree_iter iter;
+ void **slot;
+
+ rcu_register_thread();
+
+ while (!test_complete) {
+ rcu_read_lock();
+ radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+ void *entry = radix_tree_deref_slot(slot);
+ if (unlikely(!entry))
+ continue;
+
+ if (radix_tree_deref_retry(entry)) {
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+
+ if (rand_r(&seeds[1]) % 50 == 0) {
+ slot = radix_tree_iter_resume(slot, &iter);
+ rcu_read_unlock();
+ rcu_barrier();
+ rcu_read_lock();
+ }
+ }
+ rcu_read_unlock();
+ }
+
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+/*
+ * Randomly remove entries to help induce radix_tree_iter_retry() calls in the
+ * two iteration functions.
+ */
+static void *remove_entries_fn(void *arg)
+{
+ rcu_register_thread();
+
+ while (!test_complete) {
+ int pgoff;
+
+ pgoff = rand_r(&seeds[2]) % MAX_IDX;
+
+ pthread_mutex_lock(&tree_lock);
+ item_delete(&tree, pgoff);
+ pthread_mutex_unlock(&tree_lock);
+ }
+
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+static void *tag_entries_fn(void *arg)
+{
+ rcu_register_thread();
+
+ while (!test_complete) {
+ tag_tagged_items(&tree, &tree_lock, 0, MAX_IDX, 10, TAG,
+ NEW_TAG);
+ }
+ rcu_unregister_thread();
+ return NULL;
+}
+
+/* This is a unit test for a bug found by the syzkaller tester */
+void iteration_test(unsigned order, unsigned test_duration)
+{
+ int i;
+
+ printv(1, "Running %siteration tests for %d seconds\n",
+ order > 0 ? "multiorder " : "", test_duration);
+
+ max_order = order;
+ test_complete = false;
+
+ for (i = 0; i < 3; i++)
+ seeds[i] = rand();
+
+ if (pthread_create(&threads[0], NULL, tagged_iteration_fn, NULL)) {
+ perror("create tagged iteration thread");
+ exit(1);
+ }
+ if (pthread_create(&threads[1], NULL, untagged_iteration_fn, NULL)) {
+ perror("create untagged iteration thread");
+ exit(1);
+ }
+ if (pthread_create(&threads[2], NULL, add_entries_fn, NULL)) {
+ perror("create add entry thread");
+ exit(1);
+ }
+ if (pthread_create(&threads[3], NULL, remove_entries_fn, NULL)) {
+ perror("create remove entry thread");
+ exit(1);
+ }
+ if (pthread_create(&threads[4], NULL, tag_entries_fn, NULL)) {
+ perror("create tag entry thread");
+ exit(1);
+ }
+
+ sleep(test_duration);
+ test_complete = true;
+
+ for (i = 0; i < NUM_THREADS; i++) {
+ if (pthread_join(threads[i], NULL)) {
+ perror("pthread_join");
+ exit(1);
+ }
+ }
+
+ item_kill_tree(&tree);
+}
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
new file mode 100644
index 000000000..44a0d1ad4
--- /dev/null
+++ b/tools/testing/radix-tree/linux.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include <linux/gfp.h>
+#include <linux/poison.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <urcu/uatomic.h>
+
+int nr_allocated;
+int preempt_count;
+int kmalloc_verbose;
+int test_verbose;
+
+struct kmem_cache {
+ pthread_mutex_t lock;
+ int size;
+ int nr_objs;
+ void *objs;
+ void (*ctor)(void *);
+};
+
+void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
+{
+ struct radix_tree_node *node;
+
+ if (!(flags & __GFP_DIRECT_RECLAIM))
+ return NULL;
+
+ pthread_mutex_lock(&cachep->lock);
+ if (cachep->nr_objs) {
+ cachep->nr_objs--;
+ node = cachep->objs;
+ cachep->objs = node->parent;
+ pthread_mutex_unlock(&cachep->lock);
+ node->parent = NULL;
+ } else {
+ pthread_mutex_unlock(&cachep->lock);
+ node = malloc(cachep->size);
+ if (cachep->ctor)
+ cachep->ctor(node);
+ }
+
+ uatomic_inc(&nr_allocated);
+ if (kmalloc_verbose)
+ printf("Allocating %p from slab\n", node);
+ return node;
+}
+
+void kmem_cache_free(struct kmem_cache *cachep, void *objp)
+{
+ assert(objp);
+ uatomic_dec(&nr_allocated);
+ if (kmalloc_verbose)
+ printf("Freeing %p to slab\n", objp);
+ pthread_mutex_lock(&cachep->lock);
+ if (cachep->nr_objs > 10) {
+ memset(objp, POISON_FREE, cachep->size);
+ free(objp);
+ } else {
+ struct radix_tree_node *node = objp;
+ cachep->nr_objs++;
+ node->parent = cachep->objs;
+ cachep->objs = node;
+ }
+ pthread_mutex_unlock(&cachep->lock);
+}
+
+void *kmalloc(size_t size, gfp_t gfp)
+{
+ void *ret;
+
+ if (!(gfp & __GFP_DIRECT_RECLAIM))
+ return NULL;
+
+ ret = malloc(size);
+ uatomic_inc(&nr_allocated);
+ if (kmalloc_verbose)
+ printf("Allocating %p from malloc\n", ret);
+ if (gfp & __GFP_ZERO)
+ memset(ret, 0, size);
+ return ret;
+}
+
+void kfree(void *p)
+{
+ if (!p)
+ return;
+ uatomic_dec(&nr_allocated);
+ if (kmalloc_verbose)
+ printf("Freeing %p to malloc\n", p);
+ free(p);
+}
+
+struct kmem_cache *
+kmem_cache_create(const char *name, size_t size, size_t offset,
+ unsigned long flags, void (*ctor)(void *))
+{
+ struct kmem_cache *ret = malloc(sizeof(*ret));
+
+ pthread_mutex_init(&ret->lock, NULL);
+ ret->size = size;
+ ret->nr_objs = 0;
+ ret->objs = NULL;
+ ret->ctor = ctor;
+ return ret;
+}
diff --git a/tools/testing/radix-tree/linux/bug.h b/tools/testing/radix-tree/linux/bug.h
new file mode 100644
index 000000000..23b8ed52f
--- /dev/null
+++ b/tools/testing/radix-tree/linux/bug.h
@@ -0,0 +1 @@
+#include "asm/bug.h"
diff --git a/tools/testing/radix-tree/linux/compiler_types.h b/tools/testing/radix-tree/linux/compiler_types.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/radix-tree/linux/compiler_types.h
diff --git a/tools/testing/radix-tree/linux/cpu.h b/tools/testing/radix-tree/linux/cpu.h
new file mode 100644
index 000000000..a45530d78
--- /dev/null
+++ b/tools/testing/radix-tree/linux/cpu.h
@@ -0,0 +1 @@
+#define cpuhp_setup_state_nocalls(a, b, c, d) (0)
diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h
new file mode 100644
index 000000000..32159c08a
--- /dev/null
+++ b/tools/testing/radix-tree/linux/gfp.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _GFP_H
+#define _GFP_H
+
+#include <linux/types.h>
+
+#define __GFP_BITS_SHIFT 26
+#define __GFP_BITS_MASK ((gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
+
+#define __GFP_HIGH 0x20u
+#define __GFP_IO 0x40u
+#define __GFP_FS 0x80u
+#define __GFP_NOWARN 0x200u
+#define __GFP_ZERO 0x8000u
+#define __GFP_ATOMIC 0x80000u
+#define __GFP_ACCOUNT 0x100000u
+#define __GFP_DIRECT_RECLAIM 0x400000u
+#define __GFP_KSWAPD_RECLAIM 0x2000000u
+
+#define __GFP_RECLAIM (__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM)
+
+#define GFP_ZONEMASK 0x0fu
+#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
+#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
+#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM)
+
+
+static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
+{
+ return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
+}
+
+#endif
diff --git a/tools/testing/radix-tree/linux/idr.h b/tools/testing/radix-tree/linux/idr.h
new file mode 100644
index 000000000..4e342f2e3
--- /dev/null
+++ b/tools/testing/radix-tree/linux/idr.h
@@ -0,0 +1 @@
+#include "../../../../include/linux/idr.h"
diff --git a/tools/testing/radix-tree/linux/init.h b/tools/testing/radix-tree/linux/init.h
new file mode 100644
index 000000000..1bb0afc21
--- /dev/null
+++ b/tools/testing/radix-tree/linux/init.h
@@ -0,0 +1 @@
+#define __init
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
new file mode 100644
index 000000000..426f32f28
--- /dev/null
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _KERNEL_H
+#define _KERNEL_H
+
+#include "../../include/linux/kernel.h"
+#include <string.h>
+#include <stdio.h>
+#include <limits.h>
+
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include "../../../include/linux/kconfig.h"
+
+#define printk printf
+#define pr_debug printk
+#define pr_cont printk
+
+#endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/kmemleak.h b/tools/testing/radix-tree/linux/kmemleak.h
new file mode 100644
index 000000000..155f11278
--- /dev/null
+++ b/tools/testing/radix-tree/linux/kmemleak.h
@@ -0,0 +1 @@
+static inline void kmemleak_update_trace(const void *ptr) { }
diff --git a/tools/testing/radix-tree/linux/percpu.h b/tools/testing/radix-tree/linux/percpu.h
new file mode 100644
index 000000000..b2403aa74
--- /dev/null
+++ b/tools/testing/radix-tree/linux/percpu.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define DECLARE_PER_CPU(type, val) extern type val
+#define DEFINE_PER_CPU(type, val) type val
+
+#define __get_cpu_var(var) var
+#define this_cpu_ptr(var) var
+#define this_cpu_read(var) var
+#define this_cpu_xchg(var, val) uatomic_xchg(&var, val)
+#define this_cpu_cmpxchg(var, old, new) uatomic_cmpxchg(&var, old, new)
+#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+#define per_cpu(var, cpu) (*per_cpu_ptr(&(var), cpu))
diff --git a/tools/testing/radix-tree/linux/preempt.h b/tools/testing/radix-tree/linux/preempt.h
new file mode 100644
index 000000000..edb10302b
--- /dev/null
+++ b/tools/testing/radix-tree/linux/preempt.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_PREEMPT_H
+#define __LINUX_PREEMPT_H
+
+extern int preempt_count;
+
+#define preempt_disable() uatomic_inc(&preempt_count)
+#define preempt_enable() uatomic_dec(&preempt_count)
+
+static inline int in_interrupt(void)
+{
+ return 0;
+}
+
+#endif /* __LINUX_PREEMPT_H */
diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h
new file mode 100644
index 000000000..24f13d27a
--- /dev/null
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TEST_RADIX_TREE_H
+#define _TEST_RADIX_TREE_H
+
+#include "generated/map-shift.h"
+#include "../../../../include/linux/radix-tree.h"
+
+extern int kmalloc_verbose;
+extern int test_verbose;
+
+static inline void trace_call_rcu(struct rcu_head *head,
+ void (*func)(struct rcu_head *head))
+{
+ if (kmalloc_verbose)
+ printf("Delaying free of %p to slab\n", (char *)head -
+ offsetof(struct radix_tree_node, rcu_head));
+ call_rcu(head, func);
+}
+
+#define printv(verbosity_level, fmt, ...) \
+ if(test_verbose >= verbosity_level) \
+ printf(fmt, ##__VA_ARGS__)
+
+#undef call_rcu
+#define call_rcu(x, y) trace_call_rcu(x, y)
+
+#endif /* _TEST_RADIX_TREE_H */
diff --git a/tools/testing/radix-tree/linux/rcupdate.h b/tools/testing/radix-tree/linux/rcupdate.h
new file mode 100644
index 000000000..73ed33658
--- /dev/null
+++ b/tools/testing/radix-tree/linux/rcupdate.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _RCUPDATE_H
+#define _RCUPDATE_H
+
+#include <urcu.h>
+
+#define rcu_dereference_raw(p) rcu_dereference(p)
+#define rcu_dereference_protected(p, cond) rcu_dereference(p)
+
+#endif
diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h
new file mode 100644
index 000000000..a037def0d
--- /dev/null
+++ b/tools/testing/radix-tree/linux/slab.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef SLAB_H
+#define SLAB_H
+
+#include <linux/types.h>
+#include <linux/gfp.h>
+
+#define SLAB_HWCACHE_ALIGN 1
+#define SLAB_PANIC 2
+#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */
+
+void *kmalloc(size_t size, gfp_t);
+void kfree(void *);
+
+static inline void *kzalloc(size_t size, gfp_t gfp)
+{
+ return kmalloc(size, gfp | __GFP_ZERO);
+}
+
+void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
+void kmem_cache_free(struct kmem_cache *cachep, void *objp);
+
+struct kmem_cache *
+kmem_cache_create(const char *name, size_t size, size_t offset,
+ unsigned long flags, void (*ctor)(void *));
+
+#endif /* SLAB_H */
diff --git a/tools/testing/radix-tree/linux/xarray.h b/tools/testing/radix-tree/linux/xarray.h
new file mode 100644
index 000000000..df3812cda
--- /dev/null
+++ b/tools/testing/radix-tree/linux/xarray.h
@@ -0,0 +1,2 @@
+#include "generated/map-shift.h"
+#include "../../../../include/linux/xarray.h"
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
new file mode 100644
index 000000000..b741686e5
--- /dev/null
+++ b/tools/testing/radix-tree/main.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <assert.h>
+#include <limits.h>
+
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+
+#include "test.h"
+#include "regression.h"
+
+void __gang_check(unsigned long middle, long down, long up, int chunk, int hop)
+{
+ long idx;
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ middle = 1 << 30;
+
+ for (idx = -down; idx < up; idx++)
+ item_insert(&tree, middle + idx);
+
+ item_check_absent(&tree, middle - down - 1);
+ for (idx = -down; idx < up; idx++)
+ item_check_present(&tree, middle + idx);
+ item_check_absent(&tree, middle + up);
+
+ if (chunk > 0) {
+ item_gang_check_present(&tree, middle - down, up + down,
+ chunk, hop);
+ item_full_scan(&tree, middle - down, down + up, chunk);
+ }
+ item_kill_tree(&tree);
+}
+
+void gang_check(void)
+{
+ __gang_check(1UL << 30, 128, 128, 35, 2);
+ __gang_check(1UL << 31, 128, 128, 32, 32);
+ __gang_check(1UL << 31, 128, 128, 32, 100);
+ __gang_check(1UL << 31, 128, 128, 17, 7);
+ __gang_check(0xffff0000UL, 0, 65536, 17, 7);
+ __gang_check(0xfffffffeUL, 1, 1, 17, 7);
+}
+
+void __big_gang_check(void)
+{
+ unsigned long start;
+ int wrapped = 0;
+
+ start = 0;
+ do {
+ unsigned long old_start;
+
+// printf("0x%08lx\n", start);
+ __gang_check(start, rand() % 113 + 1, rand() % 71,
+ rand() % 157, rand() % 91 + 1);
+ old_start = start;
+ start += rand() % 1000000;
+ start %= 1ULL << 33;
+ if (start < old_start)
+ wrapped = 1;
+ } while (!wrapped);
+}
+
+void big_gang_check(bool long_run)
+{
+ int i;
+
+ for (i = 0; i < (long_run ? 1000 : 3); i++) {
+ __big_gang_check();
+ printv(2, "%d ", i);
+ fflush(stdout);
+ }
+}
+
+void add_and_check(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ item_insert(&tree, 44);
+ item_check_present(&tree, 44);
+ item_check_absent(&tree, 43);
+ item_kill_tree(&tree);
+}
+
+void dynamic_height_check(void)
+{
+ int i;
+ RADIX_TREE(tree, GFP_KERNEL);
+ tree_verify_min_height(&tree, 0);
+
+ item_insert(&tree, 42);
+ tree_verify_min_height(&tree, 42);
+
+ item_insert(&tree, 1000000);
+ tree_verify_min_height(&tree, 1000000);
+
+ assert(item_delete(&tree, 1000000));
+ tree_verify_min_height(&tree, 42);
+
+ assert(item_delete(&tree, 42));
+ tree_verify_min_height(&tree, 0);
+
+ for (i = 0; i < 1000; i++) {
+ item_insert(&tree, i);
+ tree_verify_min_height(&tree, i);
+ }
+
+ i--;
+ for (;;) {
+ assert(item_delete(&tree, i));
+ if (i == 0) {
+ tree_verify_min_height(&tree, 0);
+ break;
+ }
+ i--;
+ tree_verify_min_height(&tree, i);
+ }
+
+ item_kill_tree(&tree);
+}
+
+void check_copied_tags(struct radix_tree_root *tree, unsigned long start, unsigned long end, unsigned long *idx, int count, int fromtag, int totag)
+{
+ int i;
+
+ for (i = 0; i < count; i++) {
+/* if (i % 1000 == 0)
+ putchar('.'); */
+ if (idx[i] < start || idx[i] > end) {
+ if (item_tag_get(tree, idx[i], totag)) {
+ printv(2, "%lu-%lu: %lu, tags %d-%d\n", start,
+ end, idx[i], item_tag_get(tree, idx[i],
+ fromtag),
+ item_tag_get(tree, idx[i], totag));
+ }
+ assert(!item_tag_get(tree, idx[i], totag));
+ continue;
+ }
+ if (item_tag_get(tree, idx[i], fromtag) ^
+ item_tag_get(tree, idx[i], totag)) {
+ printv(2, "%lu-%lu: %lu, tags %d-%d\n", start, end,
+ idx[i], item_tag_get(tree, idx[i], fromtag),
+ item_tag_get(tree, idx[i], totag));
+ }
+ assert(!(item_tag_get(tree, idx[i], fromtag) ^
+ item_tag_get(tree, idx[i], totag)));
+ }
+}
+
+#define ITEMS 50000
+
+void copy_tag_check(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ unsigned long idx[ITEMS];
+ unsigned long start, end, count = 0, tagged, cur, tmp;
+ int i;
+
+// printf("generating radix tree indices...\n");
+ start = rand();
+ end = rand();
+ if (start > end && (rand() % 10)) {
+ cur = start;
+ start = end;
+ end = cur;
+ }
+ /* Specifically create items around the start and the end of the range
+ * with high probability to check for off by one errors */
+ cur = rand();
+ if (cur & 1) {
+ item_insert(&tree, start);
+ if (cur & 2) {
+ if (start <= end)
+ count++;
+ item_tag_set(&tree, start, 0);
+ }
+ }
+ if (cur & 4) {
+ item_insert(&tree, start-1);
+ if (cur & 8)
+ item_tag_set(&tree, start-1, 0);
+ }
+ if (cur & 16) {
+ item_insert(&tree, end);
+ if (cur & 32) {
+ if (start <= end)
+ count++;
+ item_tag_set(&tree, end, 0);
+ }
+ }
+ if (cur & 64) {
+ item_insert(&tree, end+1);
+ if (cur & 128)
+ item_tag_set(&tree, end+1, 0);
+ }
+
+ for (i = 0; i < ITEMS; i++) {
+ do {
+ idx[i] = rand();
+ } while (item_lookup(&tree, idx[i]));
+
+ item_insert(&tree, idx[i]);
+ if (rand() & 1) {
+ item_tag_set(&tree, idx[i], 0);
+ if (idx[i] >= start && idx[i] <= end)
+ count++;
+ }
+/* if (i % 1000 == 0)
+ putchar('.'); */
+ }
+
+// printf("\ncopying tags...\n");
+ tagged = tag_tagged_items(&tree, NULL, start, end, ITEMS, 0, 1);
+
+// printf("checking copied tags\n");
+ assert(tagged == count);
+ check_copied_tags(&tree, start, end, idx, ITEMS, 0, 1);
+
+ /* Copy tags in several rounds */
+// printf("\ncopying tags...\n");
+ tmp = rand() % (count / 10 + 2);
+ tagged = tag_tagged_items(&tree, NULL, start, end, tmp, 0, 2);
+ assert(tagged == count);
+
+// printf("%lu %lu %lu\n", tagged, tmp, count);
+// printf("checking copied tags\n");
+ check_copied_tags(&tree, start, end, idx, ITEMS, 0, 2);
+ verify_tag_consistency(&tree, 0);
+ verify_tag_consistency(&tree, 1);
+ verify_tag_consistency(&tree, 2);
+// printf("\n");
+ item_kill_tree(&tree);
+}
+
+static void __locate_check(struct radix_tree_root *tree, unsigned long index,
+ unsigned order)
+{
+ struct item *item;
+ unsigned long index2;
+
+ item_insert_order(tree, index, order);
+ item = item_lookup(tree, index);
+ index2 = find_item(tree, item);
+ if (index != index2) {
+ printv(2, "index %ld order %d inserted; found %ld\n",
+ index, order, index2);
+ abort();
+ }
+}
+
+static void __order_0_locate_check(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ int i;
+
+ for (i = 0; i < 50; i++)
+ __locate_check(&tree, rand() % INT_MAX, 0);
+
+ item_kill_tree(&tree);
+}
+
+static void locate_check(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ unsigned order;
+ unsigned long offset, index;
+
+ __order_0_locate_check();
+
+ for (order = 0; order < 20; order++) {
+ for (offset = 0; offset < (1 << (order + 3));
+ offset += (1UL << order)) {
+ for (index = 0; index < (1UL << (order + 5));
+ index += (1UL << order)) {
+ __locate_check(&tree, index + offset, order);
+ }
+ if (find_item(&tree, &tree) != -1)
+ abort();
+
+ item_kill_tree(&tree);
+ }
+ }
+
+ if (find_item(&tree, &tree) != -1)
+ abort();
+ __locate_check(&tree, -1, 0);
+ if (find_item(&tree, &tree) != -1)
+ abort();
+ item_kill_tree(&tree);
+}
+
+static void single_thread_tests(bool long_run)
+{
+ int i;
+
+ printv(1, "starting single_thread_tests: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ multiorder_checks();
+ rcu_barrier();
+ printv(2, "after multiorder_check: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ locate_check();
+ rcu_barrier();
+ printv(2, "after locate_check: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ tag_check();
+ rcu_barrier();
+ printv(2, "after tag_check: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ gang_check();
+ rcu_barrier();
+ printv(2, "after gang_check: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ add_and_check();
+ rcu_barrier();
+ printv(2, "after add_and_check: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ dynamic_height_check();
+ rcu_barrier();
+ printv(2, "after dynamic_height_check: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ idr_checks();
+ ida_tests();
+ rcu_barrier();
+ printv(2, "after idr_checks: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ big_gang_check(long_run);
+ rcu_barrier();
+ printv(2, "after big_gang_check: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ for (i = 0; i < (long_run ? 2000 : 3); i++) {
+ copy_tag_check();
+ printv(2, "%d ", i);
+ fflush(stdout);
+ }
+ rcu_barrier();
+ printv(2, "after copy_tag_check: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+}
+
+int main(int argc, char **argv)
+{
+ bool long_run = false;
+ int opt;
+ unsigned int seed = time(NULL);
+
+ while ((opt = getopt(argc, argv, "ls:v")) != -1) {
+ if (opt == 'l')
+ long_run = true;
+ else if (opt == 's')
+ seed = strtoul(optarg, NULL, 0);
+ else if (opt == 'v')
+ test_verbose++;
+ }
+
+ printf("random seed %u\n", seed);
+ srand(seed);
+
+ printf("running tests\n");
+
+ rcu_register_thread();
+ radix_tree_init();
+
+ regression1_test();
+ regression2_test();
+ regression3_test();
+ iteration_test(0, 10 + 90 * long_run);
+ iteration_test(7, 10 + 90 * long_run);
+ single_thread_tests(long_run);
+
+ /* Free any remaining preallocated nodes */
+ radix_tree_cpu_dead(0);
+
+ benchmark();
+
+ rcu_barrier();
+ printv(2, "after rcu_barrier: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ rcu_unregister_thread();
+
+ printf("tests completed\n");
+
+ exit(0);
+}
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
new file mode 100644
index 000000000..7bf405638
--- /dev/null
+++ b/tools/testing/radix-tree/multiorder.c
@@ -0,0 +1,719 @@
+/*
+ * multiorder.c: Multi-order radix tree entry testing
+ * Copyright (c) 2016 Intel Corporation
+ * Author: Ross Zwisler <ross.zwisler@linux.intel.com>
+ * Author: Matthew Wilcox <matthew.r.wilcox@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/radix-tree.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <pthread.h>
+
+#include "test.h"
+
+#define for_each_index(i, base, order) \
+ for (i = base; i < base + (1 << order); i++)
+
+static void __multiorder_tag_test(int index, int order)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ int base, err, i;
+
+ /* our canonical entry */
+ base = index & ~((1 << order) - 1);
+
+ printv(2, "Multiorder tag test with index %d, canonical entry %d\n",
+ index, base);
+
+ err = item_insert_order(&tree, index, order);
+ assert(!err);
+
+ /*
+ * Verify we get collisions for covered indices. We try and fail to
+ * insert an exceptional entry so we don't leak memory via
+ * item_insert_order().
+ */
+ for_each_index(i, base, order) {
+ err = __radix_tree_insert(&tree, i, order,
+ (void *)(0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY));
+ assert(err == -EEXIST);
+ }
+
+ for_each_index(i, base, order) {
+ assert(!radix_tree_tag_get(&tree, i, 0));
+ assert(!radix_tree_tag_get(&tree, i, 1));
+ }
+
+ assert(radix_tree_tag_set(&tree, index, 0));
+
+ for_each_index(i, base, order) {
+ assert(radix_tree_tag_get(&tree, i, 0));
+ assert(!radix_tree_tag_get(&tree, i, 1));
+ }
+
+ assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 1);
+ assert(radix_tree_tag_clear(&tree, index, 0));
+
+ for_each_index(i, base, order) {
+ assert(!radix_tree_tag_get(&tree, i, 0));
+ assert(radix_tree_tag_get(&tree, i, 1));
+ }
+
+ assert(radix_tree_tag_clear(&tree, index, 1));
+
+ assert(!radix_tree_tagged(&tree, 0));
+ assert(!radix_tree_tagged(&tree, 1));
+
+ item_kill_tree(&tree);
+}
+
+static void __multiorder_tag_test2(unsigned order, unsigned long index2)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ unsigned long index = (1 << order);
+ index2 += index;
+
+ assert(item_insert_order(&tree, 0, order) == 0);
+ assert(item_insert(&tree, index2) == 0);
+
+ assert(radix_tree_tag_set(&tree, 0, 0));
+ assert(radix_tree_tag_set(&tree, index2, 0));
+
+ assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 2);
+
+ item_kill_tree(&tree);
+}
+
+static void multiorder_tag_tests(void)
+{
+ int i, j;
+
+ /* test multi-order entry for indices 0-7 with no sibling pointers */
+ __multiorder_tag_test(0, 3);
+ __multiorder_tag_test(5, 3);
+
+ /* test multi-order entry for indices 8-15 with no sibling pointers */
+ __multiorder_tag_test(8, 3);
+ __multiorder_tag_test(15, 3);
+
+ /*
+ * Our order 5 entry covers indices 0-31 in a tree with height=2.
+ * This is broken up as follows:
+ * 0-7: canonical entry
+ * 8-15: sibling 1
+ * 16-23: sibling 2
+ * 24-31: sibling 3
+ */
+ __multiorder_tag_test(0, 5);
+ __multiorder_tag_test(29, 5);
+
+ /* same test, but with indices 32-63 */
+ __multiorder_tag_test(32, 5);
+ __multiorder_tag_test(44, 5);
+
+ /*
+ * Our order 8 entry covers indices 0-255 in a tree with height=3.
+ * This is broken up as follows:
+ * 0-63: canonical entry
+ * 64-127: sibling 1
+ * 128-191: sibling 2
+ * 192-255: sibling 3
+ */
+ __multiorder_tag_test(0, 8);
+ __multiorder_tag_test(190, 8);
+
+ /* same test, but with indices 256-511 */
+ __multiorder_tag_test(256, 8);
+ __multiorder_tag_test(300, 8);
+
+ __multiorder_tag_test(0x12345678UL, 8);
+
+ for (i = 1; i < 10; i++)
+ for (j = 0; j < (10 << i); j++)
+ __multiorder_tag_test2(i, j);
+}
+
+static void multiorder_check(unsigned long index, int order)
+{
+ unsigned long i;
+ unsigned long min = index & ~((1UL << order) - 1);
+ unsigned long max = min + (1UL << order);
+ void **slot;
+ struct item *item2 = item_create(min, order);
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ printv(2, "Multiorder index %ld, order %d\n", index, order);
+
+ assert(item_insert_order(&tree, index, order) == 0);
+
+ for (i = min; i < max; i++) {
+ struct item *item = item_lookup(&tree, i);
+ assert(item != 0);
+ assert(item->index == index);
+ }
+ for (i = 0; i < min; i++)
+ item_check_absent(&tree, i);
+ for (i = max; i < 2*max; i++)
+ item_check_absent(&tree, i);
+ for (i = min; i < max; i++)
+ assert(radix_tree_insert(&tree, i, item2) == -EEXIST);
+
+ slot = radix_tree_lookup_slot(&tree, index);
+ free(*slot);
+ radix_tree_replace_slot(&tree, slot, item2);
+ for (i = min; i < max; i++) {
+ struct item *item = item_lookup(&tree, i);
+ assert(item != 0);
+ assert(item->index == min);
+ }
+
+ assert(item_delete(&tree, min) != 0);
+
+ for (i = 0; i < 2*max; i++)
+ item_check_absent(&tree, i);
+}
+
+static void multiorder_shrink(unsigned long index, int order)
+{
+ unsigned long i;
+ unsigned long max = 1 << order;
+ RADIX_TREE(tree, GFP_KERNEL);
+ struct radix_tree_node *node;
+
+ printv(2, "Multiorder shrink index %ld, order %d\n", index, order);
+
+ assert(item_insert_order(&tree, 0, order) == 0);
+
+ node = tree.rnode;
+
+ assert(item_insert(&tree, index) == 0);
+ assert(node != tree.rnode);
+
+ assert(item_delete(&tree, index) != 0);
+ assert(node == tree.rnode);
+
+ for (i = 0; i < max; i++) {
+ struct item *item = item_lookup(&tree, i);
+ assert(item != 0);
+ assert(item->index == 0);
+ }
+ for (i = max; i < 2*max; i++)
+ item_check_absent(&tree, i);
+
+ if (!item_delete(&tree, 0)) {
+ printv(2, "failed to delete index %ld (order %d)\n", index, order);
+ abort();
+ }
+
+ for (i = 0; i < 2*max; i++)
+ item_check_absent(&tree, i);
+}
+
+static void multiorder_insert_bug(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ item_insert(&tree, 0);
+ radix_tree_tag_set(&tree, 0, 0);
+ item_insert_order(&tree, 3 << 6, 6);
+
+ item_kill_tree(&tree);
+}
+
+void multiorder_iteration(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ struct radix_tree_iter iter;
+ void **slot;
+ int i, j, err;
+
+ printv(1, "Multiorder iteration test\n");
+
+#define NUM_ENTRIES 11
+ int index[NUM_ENTRIES] = {0, 2, 4, 8, 16, 32, 34, 36, 64, 72, 128};
+ int order[NUM_ENTRIES] = {1, 1, 2, 3, 4, 1, 0, 1, 3, 0, 7};
+
+ for (i = 0; i < NUM_ENTRIES; i++) {
+ err = item_insert_order(&tree, index[i], order[i]);
+ assert(!err);
+ }
+
+ for (j = 0; j < 256; j++) {
+ for (i = 0; i < NUM_ENTRIES; i++)
+ if (j <= (index[i] | ((1 << order[i]) - 1)))
+ break;
+
+ radix_tree_for_each_slot(slot, &tree, &iter, j) {
+ int height = order[i] / RADIX_TREE_MAP_SHIFT;
+ int shift = height * RADIX_TREE_MAP_SHIFT;
+ unsigned long mask = (1UL << order[i]) - 1;
+ struct item *item = *slot;
+
+ assert((iter.index | mask) == (index[i] | mask));
+ assert(iter.shift == shift);
+ assert(!radix_tree_is_internal_node(item));
+ assert((item->index | mask) == (index[i] | mask));
+ assert(item->order == order[i]);
+ i++;
+ }
+ }
+
+ item_kill_tree(&tree);
+}
+
+void multiorder_tagged_iteration(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ struct radix_tree_iter iter;
+ void **slot;
+ int i, j;
+
+ printv(1, "Multiorder tagged iteration test\n");
+
+#define MT_NUM_ENTRIES 9
+ int index[MT_NUM_ENTRIES] = {0, 2, 4, 16, 32, 40, 64, 72, 128};
+ int order[MT_NUM_ENTRIES] = {1, 0, 2, 4, 3, 1, 3, 0, 7};
+
+#define TAG_ENTRIES 7
+ int tag_index[TAG_ENTRIES] = {0, 4, 16, 40, 64, 72, 128};
+
+ for (i = 0; i < MT_NUM_ENTRIES; i++)
+ assert(!item_insert_order(&tree, index[i], order[i]));
+
+ assert(!radix_tree_tagged(&tree, 1));
+
+ for (i = 0; i < TAG_ENTRIES; i++)
+ assert(radix_tree_tag_set(&tree, tag_index[i], 1));
+
+ for (j = 0; j < 256; j++) {
+ int k;
+
+ for (i = 0; i < TAG_ENTRIES; i++) {
+ for (k = i; index[k] < tag_index[i]; k++)
+ ;
+ if (j <= (index[k] | ((1 << order[k]) - 1)))
+ break;
+ }
+
+ radix_tree_for_each_tagged(slot, &tree, &iter, j, 1) {
+ unsigned long mask;
+ struct item *item = *slot;
+ for (k = i; index[k] < tag_index[i]; k++)
+ ;
+ mask = (1UL << order[k]) - 1;
+
+ assert((iter.index | mask) == (tag_index[i] | mask));
+ assert(!radix_tree_is_internal_node(item));
+ assert((item->index | mask) == (tag_index[i] | mask));
+ assert(item->order == order[k]);
+ i++;
+ }
+ }
+
+ assert(tag_tagged_items(&tree, NULL, 0, ~0UL, TAG_ENTRIES, 1, 2) ==
+ TAG_ENTRIES);
+
+ for (j = 0; j < 256; j++) {
+ int mask, k;
+
+ for (i = 0; i < TAG_ENTRIES; i++) {
+ for (k = i; index[k] < tag_index[i]; k++)
+ ;
+ if (j <= (index[k] | ((1 << order[k]) - 1)))
+ break;
+ }
+
+ radix_tree_for_each_tagged(slot, &tree, &iter, j, 2) {
+ struct item *item = *slot;
+ for (k = i; index[k] < tag_index[i]; k++)
+ ;
+ mask = (1 << order[k]) - 1;
+
+ assert((iter.index | mask) == (tag_index[i] | mask));
+ assert(!radix_tree_is_internal_node(item));
+ assert((item->index | mask) == (tag_index[i] | mask));
+ assert(item->order == order[k]);
+ i++;
+ }
+ }
+
+ assert(tag_tagged_items(&tree, NULL, 1, ~0UL, MT_NUM_ENTRIES * 2, 1, 0)
+ == TAG_ENTRIES);
+ i = 0;
+ radix_tree_for_each_tagged(slot, &tree, &iter, 0, 0) {
+ assert(iter.index == tag_index[i]);
+ i++;
+ }
+
+ item_kill_tree(&tree);
+}
+
+/*
+ * Basic join checks: make sure we can't find an entry in the tree after
+ * a larger entry has replaced it
+ */
+static void multiorder_join1(unsigned long index,
+ unsigned order1, unsigned order2)
+{
+ unsigned long loc;
+ void *item, *item2 = item_create(index + 1, order1);
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ item_insert_order(&tree, index, order2);
+ item = radix_tree_lookup(&tree, index);
+ radix_tree_join(&tree, index + 1, order1, item2);
+ loc = find_item(&tree, item);
+ if (loc == -1)
+ free(item);
+ item = radix_tree_lookup(&tree, index + 1);
+ assert(item == item2);
+ item_kill_tree(&tree);
+}
+
+/*
+ * Check that the accounting of exceptional entries is handled correctly
+ * by joining an exceptional entry to a normal pointer.
+ */
+static void multiorder_join2(unsigned order1, unsigned order2)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ struct radix_tree_node *node;
+ void *item1 = item_create(0, order1);
+ void *item2;
+
+ item_insert_order(&tree, 0, order2);
+ radix_tree_insert(&tree, 1 << order2, (void *)0x12UL);
+ item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
+ assert(item2 == (void *)0x12UL);
+ assert(node->exceptional == 1);
+
+ item2 = radix_tree_lookup(&tree, 0);
+ free(item2);
+
+ radix_tree_join(&tree, 0, order1, item1);
+ item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
+ assert(item2 == item1);
+ assert(node->exceptional == 0);
+ item_kill_tree(&tree);
+}
+
+/*
+ * This test revealed an accounting bug for exceptional entries at one point.
+ * Nodes were being freed back into the pool with an elevated exception count
+ * by radix_tree_join() and then radix_tree_split() was failing to zero the
+ * count of exceptional entries.
+ */
+static void multiorder_join3(unsigned int order)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ struct radix_tree_node *node;
+ void **slot;
+ struct radix_tree_iter iter;
+ unsigned long i;
+
+ for (i = 0; i < (1 << order); i++) {
+ radix_tree_insert(&tree, i, (void *)0x12UL);
+ }
+
+ radix_tree_join(&tree, 0, order, (void *)0x16UL);
+ rcu_barrier();
+
+ radix_tree_split(&tree, 0, 0);
+
+ radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+ radix_tree_iter_replace(&tree, &iter, slot, (void *)0x12UL);
+ }
+
+ __radix_tree_lookup(&tree, 0, &node, NULL);
+ assert(node->exceptional == node->count);
+
+ item_kill_tree(&tree);
+}
+
+static void multiorder_join(void)
+{
+ int i, j, idx;
+
+ for (idx = 0; idx < 1024; idx = idx * 2 + 3) {
+ for (i = 1; i < 15; i++) {
+ for (j = 0; j < i; j++) {
+ multiorder_join1(idx, i, j);
+ }
+ }
+ }
+
+ for (i = 1; i < 15; i++) {
+ for (j = 0; j < i; j++) {
+ multiorder_join2(i, j);
+ }
+ }
+
+ for (i = 3; i < 10; i++) {
+ multiorder_join3(i);
+ }
+}
+
+static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc)
+{
+ struct radix_tree_preload *rtp = &radix_tree_preloads;
+ if (rtp->nr != 0)
+ printv(2, "split(%u %u) remaining %u\n", old_order, new_order,
+ rtp->nr);
+ /*
+ * Can't check for equality here as some nodes may have been
+ * RCU-freed while we ran. But we should never finish with more
+ * nodes allocated since they should have all been preloaded.
+ */
+ if (nr_allocated > alloc)
+ printv(2, "split(%u %u) allocated %u %u\n", old_order, new_order,
+ alloc, nr_allocated);
+}
+
+static void __multiorder_split(int old_order, int new_order)
+{
+ RADIX_TREE(tree, GFP_ATOMIC);
+ void **slot;
+ struct radix_tree_iter iter;
+ unsigned alloc;
+ struct item *item;
+
+ radix_tree_preload(GFP_KERNEL);
+ assert(item_insert_order(&tree, 0, old_order) == 0);
+ radix_tree_preload_end();
+
+ /* Wipe out the preloaded cache or it'll confuse check_mem() */
+ radix_tree_cpu_dead(0);
+
+ item = radix_tree_tag_set(&tree, 0, 2);
+
+ radix_tree_split_preload(old_order, new_order, GFP_KERNEL);
+ alloc = nr_allocated;
+ radix_tree_split(&tree, 0, new_order);
+ check_mem(old_order, new_order, alloc);
+ radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+ radix_tree_iter_replace(&tree, &iter, slot,
+ item_create(iter.index, new_order));
+ }
+ radix_tree_preload_end();
+
+ item_kill_tree(&tree);
+ free(item);
+}
+
+static void __multiorder_split2(int old_order, int new_order)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ void **slot;
+ struct radix_tree_iter iter;
+ struct radix_tree_node *node;
+ void *item;
+
+ __radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+
+ item = __radix_tree_lookup(&tree, 0, &node, NULL);
+ assert(item == (void *)0x12);
+ assert(node->exceptional > 0);
+
+ radix_tree_split(&tree, 0, new_order);
+ radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+ radix_tree_iter_replace(&tree, &iter, slot,
+ item_create(iter.index, new_order));
+ }
+
+ item = __radix_tree_lookup(&tree, 0, &node, NULL);
+ assert(item != (void *)0x12);
+ assert(node->exceptional == 0);
+
+ item_kill_tree(&tree);
+}
+
+static void __multiorder_split3(int old_order, int new_order)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ void **slot;
+ struct radix_tree_iter iter;
+ struct radix_tree_node *node;
+ void *item;
+
+ __radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+
+ item = __radix_tree_lookup(&tree, 0, &node, NULL);
+ assert(item == (void *)0x12);
+ assert(node->exceptional > 0);
+
+ radix_tree_split(&tree, 0, new_order);
+ radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+ radix_tree_iter_replace(&tree, &iter, slot, (void *)0x16);
+ }
+
+ item = __radix_tree_lookup(&tree, 0, &node, NULL);
+ assert(item == (void *)0x16);
+ assert(node->exceptional > 0);
+
+ item_kill_tree(&tree);
+
+ __radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+
+ item = __radix_tree_lookup(&tree, 0, &node, NULL);
+ assert(item == (void *)0x12);
+ assert(node->exceptional > 0);
+
+ radix_tree_split(&tree, 0, new_order);
+ radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+ if (iter.index == (1 << new_order))
+ radix_tree_iter_replace(&tree, &iter, slot,
+ (void *)0x16);
+ else
+ radix_tree_iter_replace(&tree, &iter, slot, NULL);
+ }
+
+ item = __radix_tree_lookup(&tree, 1 << new_order, &node, NULL);
+ assert(item == (void *)0x16);
+ assert(node->count == node->exceptional);
+ do {
+ node = node->parent;
+ if (!node)
+ break;
+ assert(node->count == 1);
+ assert(node->exceptional == 0);
+ } while (1);
+
+ item_kill_tree(&tree);
+}
+
+static void multiorder_split(void)
+{
+ int i, j;
+
+ for (i = 3; i < 11; i++)
+ for (j = 0; j < i; j++) {
+ __multiorder_split(i, j);
+ __multiorder_split2(i, j);
+ __multiorder_split3(i, j);
+ }
+}
+
+static void multiorder_account(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ struct radix_tree_node *node;
+ void **slot;
+
+ item_insert_order(&tree, 0, 5);
+
+ __radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12);
+ __radix_tree_lookup(&tree, 0, &node, NULL);
+ assert(node->count == node->exceptional * 2);
+ radix_tree_delete(&tree, 1 << 5);
+ assert(node->exceptional == 0);
+
+ __radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12);
+ __radix_tree_lookup(&tree, 1 << 5, &node, &slot);
+ assert(node->count == node->exceptional * 2);
+ __radix_tree_replace(&tree, node, slot, NULL, NULL);
+ assert(node->exceptional == 0);
+
+ item_kill_tree(&tree);
+}
+
+bool stop_iteration = false;
+
+static void *creator_func(void *ptr)
+{
+ /* 'order' is set up to ensure we have sibling entries */
+ unsigned int order = RADIX_TREE_MAP_SHIFT - 1;
+ struct radix_tree_root *tree = ptr;
+ int i;
+
+ for (i = 0; i < 10000; i++) {
+ item_insert_order(tree, 0, order);
+ item_delete_rcu(tree, 0);
+ }
+
+ stop_iteration = true;
+ return NULL;
+}
+
+static void *iterator_func(void *ptr)
+{
+ struct radix_tree_root *tree = ptr;
+ struct radix_tree_iter iter;
+ struct item *item;
+ void **slot;
+
+ while (!stop_iteration) {
+ rcu_read_lock();
+ radix_tree_for_each_slot(slot, tree, &iter, 0) {
+ item = radix_tree_deref_slot(slot);
+
+ if (!item)
+ continue;
+ if (radix_tree_deref_retry(item)) {
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+
+ item_sanity(item, iter.index);
+ }
+ rcu_read_unlock();
+ }
+ return NULL;
+}
+
+static void multiorder_iteration_race(void)
+{
+ const int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
+ pthread_t worker_thread[num_threads];
+ RADIX_TREE(tree, GFP_KERNEL);
+ int i;
+
+ pthread_create(&worker_thread[0], NULL, &creator_func, &tree);
+ for (i = 1; i < num_threads; i++)
+ pthread_create(&worker_thread[i], NULL, &iterator_func, &tree);
+
+ for (i = 0; i < num_threads; i++)
+ pthread_join(worker_thread[i], NULL);
+
+ item_kill_tree(&tree);
+}
+
+void multiorder_checks(void)
+{
+ int i;
+
+ for (i = 0; i < 20; i++) {
+ multiorder_check(200, i);
+ multiorder_check(0, i);
+ multiorder_check((1UL << i) + 1, i);
+ }
+
+ for (i = 0; i < 15; i++)
+ multiorder_shrink((1UL << (i + RADIX_TREE_MAP_SHIFT)), i);
+
+ multiorder_insert_bug();
+ multiorder_tag_tests();
+ multiorder_iteration();
+ multiorder_tagged_iteration();
+ multiorder_join();
+ multiorder_split();
+ multiorder_account();
+ multiorder_iteration_race();
+
+ radix_tree_cpu_dead(0);
+}
+
+int __weak main(void)
+{
+ radix_tree_init();
+ multiorder_checks();
+ return 0;
+}
diff --git a/tools/testing/radix-tree/regression.h b/tools/testing/radix-tree/regression.h
new file mode 100644
index 000000000..3c8a1584e
--- /dev/null
+++ b/tools/testing/radix-tree/regression.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __REGRESSION_H__
+#define __REGRESSION_H__
+
+void regression1_test(void);
+void regression2_test(void);
+void regression3_test(void);
+
+#endif
diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c
new file mode 100644
index 000000000..0aece092f
--- /dev/null
+++ b/tools/testing/radix-tree/regression1.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Regression1
+ * Description:
+ * Salman Qazi describes the following radix-tree bug:
+ *
+ * In the following case, we get can get a deadlock:
+ *
+ * 0. The radix tree contains two items, one has the index 0.
+ * 1. The reader (in this case find_get_pages) takes the rcu_read_lock.
+ * 2. The reader acquires slot(s) for item(s) including the index 0 item.
+ * 3. The non-zero index item is deleted, and as a consequence the other item
+ * is moved to the root of the tree. The place where it used to be is queued
+ * for deletion after the readers finish.
+ * 3b. The zero item is deleted, removing it from the direct slot, it remains in
+ * the rcu-delayed indirect node.
+ * 4. The reader looks at the index 0 slot, and finds that the page has 0 ref
+ * count
+ * 5. The reader looks at it again, hoping that the item will either be freed
+ * or the ref count will increase. This never happens, as the slot it is
+ * looking at will never be updated. Also, this slot can never be reclaimed
+ * because the reader is holding rcu_read_lock and is in an infinite loop.
+ *
+ * The fix is to re-use the same "indirect" pointer case that requires a slot
+ * lookup retry into a general "retry the lookup" bit.
+ *
+ * Running:
+ * This test should run to completion in a few seconds. The above bug would
+ * cause it to hang indefinitely.
+ *
+ * Upstream commit:
+ * Not yet
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <assert.h>
+
+#include "regression.h"
+
+static RADIX_TREE(mt_tree, GFP_KERNEL);
+static pthread_mutex_t mt_lock = PTHREAD_MUTEX_INITIALIZER;
+
+struct page {
+ pthread_mutex_t lock;
+ struct rcu_head rcu;
+ int count;
+ unsigned long index;
+};
+
+static struct page *page_alloc(void)
+{
+ struct page *p;
+ p = malloc(sizeof(struct page));
+ p->count = 1;
+ p->index = 1;
+ pthread_mutex_init(&p->lock, NULL);
+
+ return p;
+}
+
+static void page_rcu_free(struct rcu_head *rcu)
+{
+ struct page *p = container_of(rcu, struct page, rcu);
+ assert(!p->count);
+ pthread_mutex_destroy(&p->lock);
+ free(p);
+}
+
+static void page_free(struct page *p)
+{
+ call_rcu(&p->rcu, page_rcu_free);
+}
+
+static unsigned find_get_pages(unsigned long start,
+ unsigned int nr_pages, struct page **pages)
+{
+ unsigned int i;
+ unsigned int ret;
+ unsigned int nr_found;
+
+ rcu_read_lock();
+restart:
+ nr_found = radix_tree_gang_lookup_slot(&mt_tree,
+ (void ***)pages, NULL, start, nr_pages);
+ ret = 0;
+ for (i = 0; i < nr_found; i++) {
+ struct page *page;
+repeat:
+ page = radix_tree_deref_slot((void **)pages[i]);
+ if (unlikely(!page))
+ continue;
+
+ if (radix_tree_exception(page)) {
+ if (radix_tree_deref_retry(page)) {
+ /*
+ * Transient condition which can only trigger
+ * when entry at index 0 moves out of or back
+ * to root: none yet gotten, safe to restart.
+ */
+ assert((start | i) == 0);
+ goto restart;
+ }
+ /*
+ * No exceptional entries are inserted in this test.
+ */
+ assert(0);
+ }
+
+ pthread_mutex_lock(&page->lock);
+ if (!page->count) {
+ pthread_mutex_unlock(&page->lock);
+ goto repeat;
+ }
+ /* don't actually update page refcount */
+ pthread_mutex_unlock(&page->lock);
+
+ /* Has the page moved? */
+ if (unlikely(page != *((void **)pages[i]))) {
+ goto repeat;
+ }
+
+ pages[ret] = page;
+ ret++;
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+static pthread_barrier_t worker_barrier;
+
+static void *regression1_fn(void *arg)
+{
+ rcu_register_thread();
+
+ if (pthread_barrier_wait(&worker_barrier) ==
+ PTHREAD_BARRIER_SERIAL_THREAD) {
+ int j;
+
+ for (j = 0; j < 1000000; j++) {
+ struct page *p;
+
+ p = page_alloc();
+ pthread_mutex_lock(&mt_lock);
+ radix_tree_insert(&mt_tree, 0, p);
+ pthread_mutex_unlock(&mt_lock);
+
+ p = page_alloc();
+ pthread_mutex_lock(&mt_lock);
+ radix_tree_insert(&mt_tree, 1, p);
+ pthread_mutex_unlock(&mt_lock);
+
+ pthread_mutex_lock(&mt_lock);
+ p = radix_tree_delete(&mt_tree, 1);
+ pthread_mutex_lock(&p->lock);
+ p->count--;
+ pthread_mutex_unlock(&p->lock);
+ pthread_mutex_unlock(&mt_lock);
+ page_free(p);
+
+ pthread_mutex_lock(&mt_lock);
+ p = radix_tree_delete(&mt_tree, 0);
+ pthread_mutex_lock(&p->lock);
+ p->count--;
+ pthread_mutex_unlock(&p->lock);
+ pthread_mutex_unlock(&mt_lock);
+ page_free(p);
+ }
+ } else {
+ int j;
+
+ for (j = 0; j < 100000000; j++) {
+ struct page *pages[10];
+
+ find_get_pages(0, 10, pages);
+ }
+ }
+
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+static pthread_t *threads;
+void regression1_test(void)
+{
+ int nr_threads;
+ int i;
+ long arg;
+
+ /* Regression #1 */
+ printv(1, "running regression test 1, should finish in under a minute\n");
+ nr_threads = 2;
+ pthread_barrier_init(&worker_barrier, NULL, nr_threads);
+
+ threads = malloc(nr_threads * sizeof(pthread_t *));
+
+ for (i = 0; i < nr_threads; i++) {
+ arg = i;
+ if (pthread_create(&threads[i], NULL, regression1_fn, (void *)arg)) {
+ perror("pthread_create");
+ exit(1);
+ }
+ }
+
+ for (i = 0; i < nr_threads; i++) {
+ if (pthread_join(threads[i], NULL)) {
+ perror("pthread_join");
+ exit(1);
+ }
+ }
+
+ free(threads);
+
+ printv(1, "regression test 1, done\n");
+}
diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c
new file mode 100644
index 000000000..424b91c77
--- /dev/null
+++ b/tools/testing/radix-tree/regression2.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Regression2
+ * Description:
+ * Toshiyuki Okajima describes the following radix-tree bug:
+ *
+ * In the following case, we can get a hangup on
+ * radix_radix_tree_gang_lookup_tag_slot.
+ *
+ * 0. The radix tree contains RADIX_TREE_MAP_SIZE items. And the tag of
+ * a certain item has PAGECACHE_TAG_DIRTY.
+ * 1. radix_tree_range_tag_if_tagged(, start, end, , PAGECACHE_TAG_DIRTY,
+ * PAGECACHE_TAG_TOWRITE) is called to add PAGECACHE_TAG_TOWRITE tag
+ * for the tag which has PAGECACHE_TAG_DIRTY. However, there is no tag with
+ * PAGECACHE_TAG_DIRTY within the range from start to end. As the result,
+ * There is no tag with PAGECACHE_TAG_TOWRITE but the root tag has
+ * PAGECACHE_TAG_TOWRITE.
+ * 2. An item is added into the radix tree and then the level of it is
+ * extended into 2 from 1. At that time, the new radix tree node succeeds
+ * the tag status of the root tag. Therefore the tag of the new radix tree
+ * node has PAGECACHE_TAG_TOWRITE but there is not slot with
+ * PAGECACHE_TAG_TOWRITE tag in the child node of the new radix tree node.
+ * 3. The tag of a certain item is cleared with PAGECACHE_TAG_DIRTY.
+ * 4. All items within the index range from 0 to RADIX_TREE_MAP_SIZE - 1 are
+ * released. (Only the item which index is RADIX_TREE_MAP_SIZE exist in the
+ * radix tree.) As the result, the slot of the radix tree node is NULL but
+ * the tag which corresponds to the slot has PAGECACHE_TAG_TOWRITE.
+ * 5. radix_tree_gang_lookup_tag_slot(PAGECACHE_TAG_TOWRITE) calls
+ * __lookup_tag. __lookup_tag returns with 0. And __lookup_tag doesn't
+ * change the index that is the input and output parameter. Because the 1st
+ * slot of the radix tree node is NULL, but the tag which corresponds to
+ * the slot has PAGECACHE_TAG_TOWRITE.
+ * Therefore radix_tree_gang_lookup_tag_slot tries to get some items by
+ * calling __lookup_tag, but it cannot get any items forever.
+ *
+ * The fix is to change that radix_tree_tag_if_tagged doesn't tag the root tag
+ * if it doesn't set any tags within the specified range.
+ *
+ * Running:
+ * This test should run to completion immediately. The above bug would cause it
+ * to hang indefinitely.
+ *
+ * Upstream commit:
+ * Not yet
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "regression.h"
+#include "test.h"
+
+#define PAGECACHE_TAG_DIRTY 0
+#define PAGECACHE_TAG_WRITEBACK 1
+#define PAGECACHE_TAG_TOWRITE 2
+
+static RADIX_TREE(mt_tree, GFP_KERNEL);
+unsigned long page_count = 0;
+
+struct page {
+ unsigned long index;
+};
+
+static struct page *page_alloc(void)
+{
+ struct page *p;
+ p = malloc(sizeof(struct page));
+ p->index = page_count++;
+
+ return p;
+}
+
+void regression2_test(void)
+{
+ int i;
+ struct page *p;
+ int max_slots = RADIX_TREE_MAP_SIZE;
+ unsigned long int start, end;
+ struct page *pages[1];
+
+ printv(1, "running regression test 2 (should take milliseconds)\n");
+ /* 0. */
+ for (i = 0; i <= max_slots - 1; i++) {
+ p = page_alloc();
+ radix_tree_insert(&mt_tree, i, p);
+ }
+ radix_tree_tag_set(&mt_tree, max_slots - 1, PAGECACHE_TAG_DIRTY);
+
+ /* 1. */
+ start = 0;
+ end = max_slots - 2;
+ tag_tagged_items(&mt_tree, NULL, start, end, 1,
+ PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
+
+ /* 2. */
+ p = page_alloc();
+ radix_tree_insert(&mt_tree, max_slots, p);
+
+ /* 3. */
+ radix_tree_tag_clear(&mt_tree, max_slots - 1, PAGECACHE_TAG_DIRTY);
+
+ /* 4. */
+ for (i = max_slots - 1; i >= 0; i--)
+ free(radix_tree_delete(&mt_tree, i));
+
+ /* 5. */
+ // NOTE: start should not be 0 because radix_tree_gang_lookup_tag_slot
+ // can return.
+ start = 1;
+ end = max_slots - 2;
+ radix_tree_gang_lookup_tag_slot(&mt_tree, (void ***)pages, start, end,
+ PAGECACHE_TAG_TOWRITE);
+
+ /* We remove all the remained nodes */
+ free(radix_tree_delete(&mt_tree, max_slots));
+
+ BUG_ON(!radix_tree_empty(&mt_tree));
+
+ printv(1, "regression test 2, done\n");
+}
diff --git a/tools/testing/radix-tree/regression3.c b/tools/testing/radix-tree/regression3.c
new file mode 100644
index 000000000..ace2543c3
--- /dev/null
+++ b/tools/testing/radix-tree/regression3.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Regression3
+ * Description:
+ * Helper radix_tree_iter_retry resets next_index to the current index.
+ * In following radix_tree_next_slot current chunk size becomes zero.
+ * This isn't checked and it tries to dereference null pointer in slot.
+ *
+ * Helper radix_tree_iter_resume reset slot to NULL and next_index to index + 1,
+ * for tagger iteraction it also must reset cached tags in iterator to abort
+ * next radix_tree_next_slot and go to slow-path into radix_tree_next_chunk.
+ *
+ * Running:
+ * This test should run to completion immediately. The above bug would
+ * cause it to segfault.
+ *
+ * Upstream commit:
+ * Not yet
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "regression.h"
+
+void regression3_test(void)
+{
+ RADIX_TREE(root, GFP_KERNEL);
+ void *ptr0 = (void *)4ul;
+ void *ptr = (void *)8ul;
+ struct radix_tree_iter iter;
+ void **slot;
+ bool first;
+
+ printv(1, "running regression test 3 (should take milliseconds)\n");
+
+ radix_tree_insert(&root, 0, ptr0);
+ radix_tree_tag_set(&root, 0, 0);
+
+ first = true;
+ radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
+ printv(2, "tagged %ld %p\n", iter.index, *slot);
+ if (first) {
+ radix_tree_insert(&root, 1, ptr);
+ radix_tree_tag_set(&root, 1, 0);
+ first = false;
+ }
+ if (radix_tree_deref_retry(*slot)) {
+ printv(2, "retry at %ld\n", iter.index);
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+ }
+ radix_tree_delete(&root, 1);
+
+ first = true;
+ radix_tree_for_each_slot(slot, &root, &iter, 0) {
+ printv(2, "slot %ld %p\n", iter.index, *slot);
+ if (first) {
+ radix_tree_insert(&root, 1, ptr);
+ first = false;
+ }
+ if (radix_tree_deref_retry(*slot)) {
+ printv(2, "retry at %ld\n", iter.index);
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+ }
+ radix_tree_delete(&root, 1);
+
+ first = true;
+ radix_tree_for_each_contig(slot, &root, &iter, 0) {
+ printv(2, "contig %ld %p\n", iter.index, *slot);
+ if (first) {
+ radix_tree_insert(&root, 1, ptr);
+ first = false;
+ }
+ if (radix_tree_deref_retry(*slot)) {
+ printv(2, "retry at %ld\n", iter.index);
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+ }
+
+ radix_tree_for_each_slot(slot, &root, &iter, 0) {
+ printv(2, "slot %ld %p\n", iter.index, *slot);
+ if (!iter.index) {
+ printv(2, "next at %ld\n", iter.index);
+ slot = radix_tree_iter_resume(slot, &iter);
+ }
+ }
+
+ radix_tree_for_each_contig(slot, &root, &iter, 0) {
+ printv(2, "contig %ld %p\n", iter.index, *slot);
+ if (!iter.index) {
+ printv(2, "next at %ld\n", iter.index);
+ slot = radix_tree_iter_resume(slot, &iter);
+ }
+ }
+
+ radix_tree_tag_set(&root, 0, 0);
+ radix_tree_tag_set(&root, 1, 0);
+ radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
+ printv(2, "tagged %ld %p\n", iter.index, *slot);
+ if (!iter.index) {
+ printv(2, "next at %ld\n", iter.index);
+ slot = radix_tree_iter_resume(slot, &iter);
+ }
+ }
+
+ radix_tree_delete(&root, 0);
+ radix_tree_delete(&root, 1);
+
+ printv(1, "regression test 3 passed\n");
+}
diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c
new file mode 100644
index 000000000..543181e48
--- /dev/null
+++ b/tools/testing/radix-tree/tag_check.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+
+#include "test.h"
+
+
+static void
+__simple_checks(struct radix_tree_root *tree, unsigned long index, int tag)
+{
+ unsigned long first = 0;
+ int ret;
+
+ item_check_absent(tree, index);
+ assert(item_tag_get(tree, index, tag) == 0);
+
+ item_insert(tree, index);
+ assert(item_tag_get(tree, index, tag) == 0);
+ item_tag_set(tree, index, tag);
+ ret = item_tag_get(tree, index, tag);
+ assert(ret != 0);
+ ret = tag_tagged_items(tree, NULL, first, ~0UL, 10, tag, !tag);
+ assert(ret == 1);
+ ret = item_tag_get(tree, index, !tag);
+ assert(ret != 0);
+ ret = item_delete(tree, index);
+ assert(ret != 0);
+ item_insert(tree, index);
+ ret = item_tag_get(tree, index, tag);
+ assert(ret == 0);
+ ret = item_delete(tree, index);
+ assert(ret != 0);
+ ret = item_delete(tree, index);
+ assert(ret == 0);
+}
+
+void simple_checks(void)
+{
+ unsigned long index;
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ for (index = 0; index < 10000; index++) {
+ __simple_checks(&tree, index, 0);
+ __simple_checks(&tree, index, 1);
+ }
+ verify_tag_consistency(&tree, 0);
+ verify_tag_consistency(&tree, 1);
+ printv(2, "before item_kill_tree: %d allocated\n", nr_allocated);
+ item_kill_tree(&tree);
+ rcu_barrier();
+ printv(2, "after item_kill_tree: %d allocated\n", nr_allocated);
+}
+
+/*
+ * Check that tags propagate correctly when extending a tree.
+ */
+static void extend_checks(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ item_insert(&tree, 43);
+ assert(item_tag_get(&tree, 43, 0) == 0);
+ item_tag_set(&tree, 43, 0);
+ assert(item_tag_get(&tree, 43, 0) == 1);
+ item_insert(&tree, 1000000);
+ assert(item_tag_get(&tree, 43, 0) == 1);
+
+ item_insert(&tree, 0);
+ item_tag_set(&tree, 0, 0);
+ item_delete(&tree, 1000000);
+ assert(item_tag_get(&tree, 43, 0) != 0);
+ item_delete(&tree, 43);
+ assert(item_tag_get(&tree, 43, 0) == 0); /* crash */
+ assert(item_tag_get(&tree, 0, 0) == 1);
+
+ verify_tag_consistency(&tree, 0);
+
+ item_kill_tree(&tree);
+}
+
+/*
+ * Check that tags propagate correctly when contracting a tree.
+ */
+static void contract_checks(void)
+{
+ struct item *item;
+ int tmp;
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ tmp = 1<<RADIX_TREE_MAP_SHIFT;
+ item_insert(&tree, tmp);
+ item_insert(&tree, tmp+1);
+ item_tag_set(&tree, tmp, 0);
+ item_tag_set(&tree, tmp, 1);
+ item_tag_set(&tree, tmp+1, 0);
+ item_delete(&tree, tmp+1);
+ item_tag_clear(&tree, tmp, 1);
+
+ assert(radix_tree_gang_lookup_tag(&tree, (void **)&item, 0, 1, 0) == 1);
+ assert(radix_tree_gang_lookup_tag(&tree, (void **)&item, 0, 1, 1) == 0);
+
+ assert(item_tag_get(&tree, tmp, 0) == 1);
+ assert(item_tag_get(&tree, tmp, 1) == 0);
+
+ verify_tag_consistency(&tree, 0);
+ item_kill_tree(&tree);
+}
+
+/*
+ * Stupid tag thrasher
+ *
+ * Create a large linear array corresponding to the tree. Each element in
+ * the array is coherent with each node in the tree
+ */
+
+enum {
+ NODE_ABSENT = 0,
+ NODE_PRESENT = 1,
+ NODE_TAGGED = 2,
+};
+
+#define THRASH_SIZE (1000 * 1000)
+#define N 127
+#define BATCH 33
+
+static void gang_check(struct radix_tree_root *tree,
+ char *thrash_state, int tag)
+{
+ struct item *items[BATCH];
+ int nr_found;
+ unsigned long index = 0;
+ unsigned long last_index = 0;
+
+ while ((nr_found = radix_tree_gang_lookup_tag(tree, (void **)items,
+ index, BATCH, tag))) {
+ int i;
+
+ for (i = 0; i < nr_found; i++) {
+ struct item *item = items[i];
+
+ while (last_index < item->index) {
+ assert(thrash_state[last_index] != NODE_TAGGED);
+ last_index++;
+ }
+ assert(thrash_state[last_index] == NODE_TAGGED);
+ last_index++;
+ }
+ index = items[nr_found - 1]->index + 1;
+ }
+}
+
+static void do_thrash(struct radix_tree_root *tree, char *thrash_state, int tag)
+{
+ int insert_chunk;
+ int delete_chunk;
+ int tag_chunk;
+ int untag_chunk;
+ int total_tagged = 0;
+ int total_present = 0;
+
+ for (insert_chunk = 1; insert_chunk < THRASH_SIZE; insert_chunk *= N)
+ for (delete_chunk = 1; delete_chunk < THRASH_SIZE; delete_chunk *= N)
+ for (tag_chunk = 1; tag_chunk < THRASH_SIZE; tag_chunk *= N)
+ for (untag_chunk = 1; untag_chunk < THRASH_SIZE; untag_chunk *= N) {
+ int i;
+ unsigned long index;
+ int nr_inserted = 0;
+ int nr_deleted = 0;
+ int nr_tagged = 0;
+ int nr_untagged = 0;
+ int actual_total_tagged;
+ int actual_total_present;
+
+ for (i = 0; i < insert_chunk; i++) {
+ index = rand() % THRASH_SIZE;
+ if (thrash_state[index] != NODE_ABSENT)
+ continue;
+ item_check_absent(tree, index);
+ item_insert(tree, index);
+ assert(thrash_state[index] != NODE_PRESENT);
+ thrash_state[index] = NODE_PRESENT;
+ nr_inserted++;
+ total_present++;
+ }
+
+ for (i = 0; i < delete_chunk; i++) {
+ index = rand() % THRASH_SIZE;
+ if (thrash_state[index] == NODE_ABSENT)
+ continue;
+ item_check_present(tree, index);
+ if (item_tag_get(tree, index, tag)) {
+ assert(thrash_state[index] == NODE_TAGGED);
+ total_tagged--;
+ } else {
+ assert(thrash_state[index] == NODE_PRESENT);
+ }
+ item_delete(tree, index);
+ assert(thrash_state[index] != NODE_ABSENT);
+ thrash_state[index] = NODE_ABSENT;
+ nr_deleted++;
+ total_present--;
+ }
+
+ for (i = 0; i < tag_chunk; i++) {
+ index = rand() % THRASH_SIZE;
+ if (thrash_state[index] != NODE_PRESENT) {
+ if (item_lookup(tree, index))
+ assert(item_tag_get(tree, index, tag));
+ continue;
+ }
+ item_tag_set(tree, index, tag);
+ item_tag_set(tree, index, tag);
+ assert(thrash_state[index] != NODE_TAGGED);
+ thrash_state[index] = NODE_TAGGED;
+ nr_tagged++;
+ total_tagged++;
+ }
+
+ for (i = 0; i < untag_chunk; i++) {
+ index = rand() % THRASH_SIZE;
+ if (thrash_state[index] != NODE_TAGGED)
+ continue;
+ item_check_present(tree, index);
+ assert(item_tag_get(tree, index, tag));
+ item_tag_clear(tree, index, tag);
+ item_tag_clear(tree, index, tag);
+ assert(thrash_state[index] != NODE_PRESENT);
+ thrash_state[index] = NODE_PRESENT;
+ nr_untagged++;
+ total_tagged--;
+ }
+
+ actual_total_tagged = 0;
+ actual_total_present = 0;
+ for (index = 0; index < THRASH_SIZE; index++) {
+ switch (thrash_state[index]) {
+ case NODE_ABSENT:
+ item_check_absent(tree, index);
+ break;
+ case NODE_PRESENT:
+ item_check_present(tree, index);
+ assert(!item_tag_get(tree, index, tag));
+ actual_total_present++;
+ break;
+ case NODE_TAGGED:
+ item_check_present(tree, index);
+ assert(item_tag_get(tree, index, tag));
+ actual_total_present++;
+ actual_total_tagged++;
+ break;
+ }
+ }
+
+ gang_check(tree, thrash_state, tag);
+
+ printv(2, "%d(%d) %d(%d) %d(%d) %d(%d) / "
+ "%d(%d) present, %d(%d) tagged\n",
+ insert_chunk, nr_inserted,
+ delete_chunk, nr_deleted,
+ tag_chunk, nr_tagged,
+ untag_chunk, nr_untagged,
+ total_present, actual_total_present,
+ total_tagged, actual_total_tagged);
+ }
+}
+
+static void thrash_tags(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ char *thrash_state;
+
+ thrash_state = malloc(THRASH_SIZE);
+ memset(thrash_state, 0, THRASH_SIZE);
+
+ do_thrash(&tree, thrash_state, 0);
+
+ verify_tag_consistency(&tree, 0);
+ item_kill_tree(&tree);
+ free(thrash_state);
+}
+
+static void leak_check(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ item_insert(&tree, 1000000);
+ item_delete(&tree, 1000000);
+ item_kill_tree(&tree);
+}
+
+static void __leak_check(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+ item_insert(&tree, 1000000);
+ printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+ item_delete(&tree, 1000000);
+ printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+ item_kill_tree(&tree);
+ printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+}
+
+static void single_check(void)
+{
+ struct item *items[BATCH];
+ RADIX_TREE(tree, GFP_KERNEL);
+ int ret;
+ unsigned long first = 0;
+
+ item_insert(&tree, 0);
+ item_tag_set(&tree, 0, 0);
+ ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 0);
+ assert(ret == 1);
+ ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 1, BATCH, 0);
+ assert(ret == 0);
+ verify_tag_consistency(&tree, 0);
+ verify_tag_consistency(&tree, 1);
+ ret = tag_tagged_items(&tree, NULL, first, 10, 10, 0, 1);
+ assert(ret == 1);
+ ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 1);
+ assert(ret == 1);
+ item_tag_clear(&tree, 0, 0);
+ ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 0);
+ assert(ret == 0);
+ item_kill_tree(&tree);
+}
+
+void radix_tree_clear_tags_test(void)
+{
+ unsigned long index;
+ struct radix_tree_node *node;
+ struct radix_tree_iter iter;
+ void **slot;
+
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ item_insert(&tree, 0);
+ item_tag_set(&tree, 0, 0);
+ __radix_tree_lookup(&tree, 0, &node, &slot);
+ radix_tree_clear_tags(&tree, node, slot);
+ assert(item_tag_get(&tree, 0, 0) == 0);
+
+ for (index = 0; index < 1000; index++) {
+ item_insert(&tree, index);
+ item_tag_set(&tree, index, 0);
+ }
+
+ radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+ radix_tree_clear_tags(&tree, iter.node, slot);
+ assert(item_tag_get(&tree, iter.index, 0) == 0);
+ }
+
+ item_kill_tree(&tree);
+}
+
+void tag_check(void)
+{
+ single_check();
+ extend_checks();
+ contract_checks();
+ rcu_barrier();
+ printv(2, "after extend_checks: %d allocated\n", nr_allocated);
+ __leak_check();
+ leak_check();
+ rcu_barrier();
+ printv(2, "after leak_check: %d allocated\n", nr_allocated);
+ simple_checks();
+ rcu_barrier();
+ printv(2, "after simple_checks: %d allocated\n", nr_allocated);
+ thrash_tags();
+ rcu_barrier();
+ printv(2, "after thrash_tags: %d allocated\n", nr_allocated);
+ radix_tree_clear_tags_test();
+}
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
new file mode 100644
index 000000000..def601557
--- /dev/null
+++ b/tools/testing/radix-tree/test.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+
+#include "test.h"
+
+struct item *
+item_tag_set(struct radix_tree_root *root, unsigned long index, int tag)
+{
+ return radix_tree_tag_set(root, index, tag);
+}
+
+struct item *
+item_tag_clear(struct radix_tree_root *root, unsigned long index, int tag)
+{
+ return radix_tree_tag_clear(root, index, tag);
+}
+
+int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag)
+{
+ return radix_tree_tag_get(root, index, tag);
+}
+
+int __item_insert(struct radix_tree_root *root, struct item *item)
+{
+ return __radix_tree_insert(root, item->index, item->order, item);
+}
+
+struct item *item_create(unsigned long index, unsigned int order)
+{
+ struct item *ret = malloc(sizeof(*ret));
+
+ ret->index = index;
+ ret->order = order;
+ return ret;
+}
+
+int item_insert_order(struct radix_tree_root *root, unsigned long index,
+ unsigned order)
+{
+ struct item *item = item_create(index, order);
+ int err = __item_insert(root, item);
+ if (err)
+ free(item);
+ return err;
+}
+
+int item_insert(struct radix_tree_root *root, unsigned long index)
+{
+ return item_insert_order(root, index, 0);
+}
+
+void item_sanity(struct item *item, unsigned long index)
+{
+ unsigned long mask;
+ assert(!radix_tree_is_internal_node(item));
+ assert(item->order < BITS_PER_LONG);
+ mask = (1UL << item->order) - 1;
+ assert((item->index | mask) == (index | mask));
+}
+
+int item_delete(struct radix_tree_root *root, unsigned long index)
+{
+ struct item *item = radix_tree_delete(root, index);
+
+ if (item) {
+ item_sanity(item, index);
+ free(item);
+ return 1;
+ }
+ return 0;
+}
+
+static void item_free_rcu(struct rcu_head *head)
+{
+ struct item *item = container_of(head, struct item, rcu_head);
+
+ free(item);
+}
+
+int item_delete_rcu(struct radix_tree_root *root, unsigned long index)
+{
+ struct item *item = radix_tree_delete(root, index);
+
+ if (item) {
+ item_sanity(item, index);
+ call_rcu(&item->rcu_head, item_free_rcu);
+ return 1;
+ }
+ return 0;
+}
+
+void item_check_present(struct radix_tree_root *root, unsigned long index)
+{
+ struct item *item;
+
+ item = radix_tree_lookup(root, index);
+ assert(item != NULL);
+ item_sanity(item, index);
+}
+
+struct item *item_lookup(struct radix_tree_root *root, unsigned long index)
+{
+ return radix_tree_lookup(root, index);
+}
+
+void item_check_absent(struct radix_tree_root *root, unsigned long index)
+{
+ struct item *item;
+
+ item = radix_tree_lookup(root, index);
+ assert(item == NULL);
+}
+
+/*
+ * Scan only the passed (start, start+nr] for present items
+ */
+void item_gang_check_present(struct radix_tree_root *root,
+ unsigned long start, unsigned long nr,
+ int chunk, int hop)
+{
+ struct item *items[chunk];
+ unsigned long into;
+
+ for (into = 0; into < nr; ) {
+ int nfound;
+ int nr_to_find = chunk;
+ int i;
+
+ if (nr_to_find > (nr - into))
+ nr_to_find = nr - into;
+
+ nfound = radix_tree_gang_lookup(root, (void **)items,
+ start + into, nr_to_find);
+ assert(nfound == nr_to_find);
+ for (i = 0; i < nfound; i++)
+ assert(items[i]->index == start + into + i);
+ into += hop;
+ }
+}
+
+/*
+ * Scan the entire tree, only expecting present items (start, start+nr]
+ */
+void item_full_scan(struct radix_tree_root *root, unsigned long start,
+ unsigned long nr, int chunk)
+{
+ struct item *items[chunk];
+ unsigned long into = 0;
+ unsigned long this_index = start;
+ int nfound;
+ int i;
+
+// printf("%s(0x%08lx, 0x%08lx, %d)\n", __FUNCTION__, start, nr, chunk);
+
+ while ((nfound = radix_tree_gang_lookup(root, (void **)items, into,
+ chunk))) {
+// printf("At 0x%08lx, nfound=%d\n", into, nfound);
+ for (i = 0; i < nfound; i++) {
+ assert(items[i]->index == this_index);
+ this_index++;
+ }
+// printf("Found 0x%08lx->0x%08lx\n",
+// items[0]->index, items[nfound-1]->index);
+ into = this_index;
+ }
+ if (chunk)
+ assert(this_index == start + nr);
+ nfound = radix_tree_gang_lookup(root, (void **)items,
+ this_index, chunk);
+ assert(nfound == 0);
+}
+
+/* Use the same pattern as tag_pages_for_writeback() in mm/page-writeback.c */
+int tag_tagged_items(struct radix_tree_root *root, pthread_mutex_t *lock,
+ unsigned long start, unsigned long end, unsigned batch,
+ unsigned iftag, unsigned thentag)
+{
+ unsigned long tagged = 0;
+ struct radix_tree_iter iter;
+ void **slot;
+
+ if (batch == 0)
+ batch = 1;
+
+ if (lock)
+ pthread_mutex_lock(lock);
+ radix_tree_for_each_tagged(slot, root, &iter, start, iftag) {
+ if (iter.index > end)
+ break;
+ radix_tree_iter_tag_set(root, &iter, thentag);
+ tagged++;
+ if ((tagged % batch) != 0)
+ continue;
+ slot = radix_tree_iter_resume(slot, &iter);
+ if (lock) {
+ pthread_mutex_unlock(lock);
+ rcu_barrier();
+ pthread_mutex_lock(lock);
+ }
+ }
+ if (lock)
+ pthread_mutex_unlock(lock);
+
+ return tagged;
+}
+
+/* Use the same pattern as find_swap_entry() in mm/shmem.c */
+unsigned long find_item(struct radix_tree_root *root, void *item)
+{
+ struct radix_tree_iter iter;
+ void **slot;
+ unsigned long found = -1;
+ unsigned long checked = 0;
+
+ radix_tree_for_each_slot(slot, root, &iter, 0) {
+ if (*slot == item) {
+ found = iter.index;
+ break;
+ }
+ checked++;
+ if ((checked % 4) != 0)
+ continue;
+ slot = radix_tree_iter_resume(slot, &iter);
+ }
+
+ return found;
+}
+
+static int verify_node(struct radix_tree_node *slot, unsigned int tag,
+ int tagged)
+{
+ int anyset = 0;
+ int i;
+ int j;
+
+ slot = entry_to_node(slot);
+
+ /* Verify consistency at this level */
+ for (i = 0; i < RADIX_TREE_TAG_LONGS; i++) {
+ if (slot->tags[tag][i]) {
+ anyset = 1;
+ break;
+ }
+ }
+ if (tagged != anyset) {
+ printf("tag: %u, shift %u, tagged: %d, anyset: %d\n",
+ tag, slot->shift, tagged, anyset);
+ for (j = 0; j < RADIX_TREE_MAX_TAGS; j++) {
+ printf("tag %d: ", j);
+ for (i = 0; i < RADIX_TREE_TAG_LONGS; i++)
+ printf("%016lx ", slot->tags[j][i]);
+ printf("\n");
+ }
+ return 1;
+ }
+ assert(tagged == anyset);
+
+ /* Go for next level */
+ if (slot->shift > 0) {
+ for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
+ if (slot->slots[i])
+ if (verify_node(slot->slots[i], tag,
+ !!test_bit(i, slot->tags[tag]))) {
+ printf("Failure at off %d\n", i);
+ for (j = 0; j < RADIX_TREE_MAX_TAGS; j++) {
+ printf("tag %d: ", j);
+ for (i = 0; i < RADIX_TREE_TAG_LONGS; i++)
+ printf("%016lx ", slot->tags[j][i]);
+ printf("\n");
+ }
+ return 1;
+ }
+ }
+ return 0;
+}
+
+void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag)
+{
+ struct radix_tree_node *node = root->rnode;
+ if (!radix_tree_is_internal_node(node))
+ return;
+ verify_node(node, tag, !!root_tag_get(root, tag));
+}
+
+void item_kill_tree(struct radix_tree_root *root)
+{
+ struct radix_tree_iter iter;
+ void **slot;
+ struct item *items[32];
+ int nfound;
+
+ radix_tree_for_each_slot(slot, root, &iter, 0) {
+ if (radix_tree_exceptional_entry(*slot))
+ radix_tree_delete(root, iter.index);
+ }
+
+ while ((nfound = radix_tree_gang_lookup(root, (void **)items, 0, 32))) {
+ int i;
+
+ for (i = 0; i < nfound; i++) {
+ void *ret;
+
+ ret = radix_tree_delete(root, items[i]->index);
+ assert(ret == items[i]);
+ free(items[i]);
+ }
+ }
+ assert(radix_tree_gang_lookup(root, (void **)items, 0, 32) == 0);
+ assert(root->rnode == NULL);
+}
+
+void tree_verify_min_height(struct radix_tree_root *root, int maxindex)
+{
+ unsigned shift;
+ struct radix_tree_node *node = root->rnode;
+ if (!radix_tree_is_internal_node(node)) {
+ assert(maxindex == 0);
+ return;
+ }
+
+ node = entry_to_node(node);
+ assert(maxindex <= node_maxindex(node));
+
+ shift = node->shift;
+ if (shift > 0)
+ assert(maxindex > shift_maxindex(shift - RADIX_TREE_MAP_SHIFT));
+ else
+ assert(maxindex > 0);
+}
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
new file mode 100644
index 000000000..92d901eac
--- /dev/null
+++ b/tools/testing/radix-tree/test.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/gfp.h>
+#include <linux/types.h>
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
+
+struct item {
+ struct rcu_head rcu_head;
+ unsigned long index;
+ unsigned int order;
+};
+
+struct item *item_create(unsigned long index, unsigned int order);
+int __item_insert(struct radix_tree_root *root, struct item *item);
+int item_insert(struct radix_tree_root *root, unsigned long index);
+void item_sanity(struct item *item, unsigned long index);
+int item_insert_order(struct radix_tree_root *root, unsigned long index,
+ unsigned order);
+int item_delete(struct radix_tree_root *root, unsigned long index);
+int item_delete_rcu(struct radix_tree_root *root, unsigned long index);
+struct item *item_lookup(struct radix_tree_root *root, unsigned long index);
+
+void item_check_present(struct radix_tree_root *root, unsigned long index);
+void item_check_absent(struct radix_tree_root *root, unsigned long index);
+void item_gang_check_present(struct radix_tree_root *root,
+ unsigned long start, unsigned long nr,
+ int chunk, int hop);
+void item_full_scan(struct radix_tree_root *root, unsigned long start,
+ unsigned long nr, int chunk);
+void item_kill_tree(struct radix_tree_root *root);
+
+int tag_tagged_items(struct radix_tree_root *, pthread_mutex_t *,
+ unsigned long start, unsigned long end, unsigned batch,
+ unsigned iftag, unsigned thentag);
+unsigned long find_item(struct radix_tree_root *, void *item);
+
+void tag_check(void);
+void multiorder_checks(void);
+void iteration_test(unsigned order, unsigned duration);
+void benchmark(void);
+void idr_checks(void);
+void ida_tests(void);
+
+struct item *
+item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);
+struct item *
+item_tag_clear(struct radix_tree_root *root, unsigned long index, int tag);
+int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag);
+void tree_verify_min_height(struct radix_tree_root *root, int maxindex);
+void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag);
+
+extern int nr_allocated;
+
+/* Normally private parts of lib/radix-tree.c */
+struct radix_tree_node *entry_to_node(void *ptr);
+void radix_tree_dump(struct radix_tree_root *root);
+int root_tag_get(struct radix_tree_root *root, unsigned int tag);
+unsigned long node_maxindex(struct radix_tree_node *);
+unsigned long shift_maxindex(unsigned int shift);
+int radix_tree_cpu_dead(unsigned int cpu);
+struct radix_tree_preload {
+ unsigned nr;
+ struct radix_tree_node *nodes;
+};
+extern struct radix_tree_preload radix_tree_preloads;
diff --git a/tools/testing/scatterlist/Makefile b/tools/testing/scatterlist/Makefile
new file mode 100644
index 000000000..933c3a6e4
--- /dev/null
+++ b/tools/testing/scatterlist/Makefile
@@ -0,0 +1,30 @@
+CFLAGS += -I. -I../../include -g -O2 -Wall -fsanitize=address
+LDFLAGS += -fsanitize=address -fsanitize=undefined
+TARGETS = main
+OFILES = main.o scatterlist.o
+
+ifeq ($(BUILD), 32)
+ CFLAGS += -m32
+ LDFLAGS += -m32
+endif
+
+targets: include $(TARGETS)
+
+main: $(OFILES)
+
+clean:
+ $(RM) $(TARGETS) $(OFILES) scatterlist.c linux/scatterlist.h linux/highmem.h linux/kmemleak.h asm/io.h
+ @rmdir asm
+
+scatterlist.c: ../../../lib/scatterlist.c
+ @sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+
+.PHONY: include
+
+include: ../../../include/linux/scatterlist.h
+ @mkdir -p linux
+ @mkdir -p asm
+ @touch asm/io.h
+ @touch linux/highmem.h
+ @touch linux/kmemleak.h
+ @cp $< linux/scatterlist.h
diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h
new file mode 100644
index 000000000..6f9ac14aa
--- /dev/null
+++ b/tools/testing/scatterlist/linux/mm.h
@@ -0,0 +1,125 @@
+#ifndef _LINUX_MM_H
+#define _LINUX_MM_H
+
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+
+typedef unsigned long dma_addr_t;
+
+#define unlikely
+
+#define BUG_ON(x) assert(!(x))
+
+#define WARN_ON(condition) ({ \
+ int __ret_warn_on = !!(condition); \
+ unlikely(__ret_warn_on); \
+})
+
+#define WARN_ON_ONCE(condition) ({ \
+ int __ret_warn_on = !!(condition); \
+ if (unlikely(__ret_warn_on)) \
+ assert(0); \
+ unlikely(__ret_warn_on); \
+})
+
+#define PAGE_SIZE (4096)
+#define PAGE_SHIFT (12)
+#define PAGE_MASK (~(PAGE_SIZE-1))
+
+#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
+#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
+#define ALIGN(x, a) __ALIGN_KERNEL((x), (a))
+
+#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
+
+#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
+
+#define virt_to_page(x) ((void *)x)
+#define page_address(x) ((void *)x)
+
+static inline unsigned long page_to_phys(struct page *page)
+{
+ assert(0);
+
+ return 0;
+}
+
+#define page_to_pfn(page) ((unsigned long)(page) / PAGE_SIZE)
+#define pfn_to_page(pfn) (void *)((pfn) * PAGE_SIZE)
+#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
+
+#define __min(t1, t2, min1, min2, x, y) ({ \
+ t1 min1 = (x); \
+ t2 min2 = (y); \
+ (void) (&min1 == &min2); \
+ min1 < min2 ? min1 : min2; })
+
+#define ___PASTE(a,b) a##b
+#define __PASTE(a,b) ___PASTE(a,b)
+
+#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
+
+#define min(x, y) \
+ __min(typeof(x), typeof(y), \
+ __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \
+ x, y)
+
+#define min_t(type, x, y) \
+ __min(type, type, \
+ __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \
+ x, y)
+
+#define preemptible() (1)
+
+static inline void *kmap(struct page *page)
+{
+ assert(0);
+
+ return NULL;
+}
+
+static inline void *kmap_atomic(struct page *page)
+{
+ assert(0);
+
+ return NULL;
+}
+
+static inline void kunmap(void *addr)
+{
+ assert(0);
+}
+
+static inline void kunmap_atomic(void *addr)
+{
+ assert(0);
+}
+
+static inline unsigned long __get_free_page(unsigned int flags)
+{
+ return (unsigned long)malloc(PAGE_SIZE);
+}
+
+static inline void free_page(unsigned long page)
+{
+ free((void *)page);
+}
+
+static inline void *kmalloc(unsigned int size, unsigned int flags)
+{
+ return malloc(size);
+}
+
+#define kfree(x) free(x)
+
+#define kmemleak_alloc(a, b, c, d)
+#define kmemleak_free(a)
+
+#define PageSlab(p) (0)
+#define flush_kernel_dcache_page(p)
+
+#endif
diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
new file mode 100644
index 000000000..0a1464181
--- /dev/null
+++ b/tools/testing/scatterlist/main.c
@@ -0,0 +1,79 @@
+#include <stdio.h>
+#include <assert.h>
+
+#include <linux/scatterlist.h>
+
+#define MAX_PAGES (64)
+
+static void set_pages(struct page **pages, const unsigned *array, unsigned num)
+{
+ unsigned int i;
+
+ assert(num < MAX_PAGES);
+ for (i = 0; i < num; i++)
+ pages[i] = (struct page *)(unsigned long)
+ ((1 + array[i]) * PAGE_SIZE);
+}
+
+#define pfn(...) (unsigned []){ __VA_ARGS__ }
+
+int main(void)
+{
+ const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT;
+ struct test {
+ int alloc_ret;
+ unsigned num_pages;
+ unsigned *pfn;
+ unsigned size;
+ unsigned int max_seg;
+ unsigned int expected_segments;
+ } *test, tests[] = {
+ { -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 },
+ { -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 },
+ { -EINVAL, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 },
+ { 0, 1, pfn(0), PAGE_SIZE, sgmax, 1 },
+ { 0, 1, pfn(0), 1, sgmax, 1 },
+ { 0, 2, pfn(0, 1), 2 * PAGE_SIZE, sgmax, 1 },
+ { 0, 2, pfn(1, 0), 2 * PAGE_SIZE, sgmax, 2 },
+ { 0, 3, pfn(0, 1, 2), 3 * PAGE_SIZE, sgmax, 1 },
+ { 0, 3, pfn(0, 2, 1), 3 * PAGE_SIZE, sgmax, 3 },
+ { 0, 3, pfn(0, 1, 3), 3 * PAGE_SIZE, sgmax, 2 },
+ { 0, 3, pfn(1, 2, 4), 3 * PAGE_SIZE, sgmax, 2 },
+ { 0, 3, pfn(1, 3, 4), 3 * PAGE_SIZE, sgmax, 2 },
+ { 0, 4, pfn(0, 1, 3, 4), 4 * PAGE_SIZE, sgmax, 2 },
+ { 0, 5, pfn(0, 1, 3, 4, 5), 5 * PAGE_SIZE, sgmax, 2 },
+ { 0, 5, pfn(0, 1, 3, 4, 6), 5 * PAGE_SIZE, sgmax, 3 },
+ { 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, sgmax, 1 },
+ { 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
+ { 0, 6, pfn(0, 1, 2, 3, 4, 5), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
+ { 0, 6, pfn(0, 2, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 4 },
+ { 0, 6, pfn(0, 1, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
+ { 0, 0, NULL, 0, 0, 0 },
+ };
+ unsigned int i;
+
+ for (i = 0, test = tests; test->expected_segments; test++, i++) {
+ struct page *pages[MAX_PAGES];
+ struct sg_table st;
+ int ret;
+
+ set_pages(pages, test->pfn, test->num_pages);
+
+ ret = __sg_alloc_table_from_pages(&st, pages, test->num_pages,
+ 0, test->size, test->max_seg,
+ GFP_KERNEL);
+ assert(ret == test->alloc_ret);
+
+ if (test->alloc_ret)
+ continue;
+
+ assert(st.nents == test->expected_segments);
+ assert(st.orig_nents == test->expected_segments);
+
+ sg_free_table(&st);
+ }
+
+ assert(i == (sizeof(tests) / sizeof(tests[0])) - 1);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore
new file mode 100644
index 000000000..917503524
--- /dev/null
+++ b/tools/testing/selftests/.gitignore
@@ -0,0 +1,5 @@
+kselftest
+gpiogpio-event-mon
+gpiogpio-hammer
+gpioinclude/
+gpiolsgpio
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
new file mode 100644
index 000000000..f0017c831
--- /dev/null
+++ b/tools/testing/selftests/Makefile
@@ -0,0 +1,167 @@
+# SPDX-License-Identifier: GPL-2.0
+TARGETS = android
+TARGETS += bpf
+TARGETS += breakpoints
+TARGETS += capabilities
+TARGETS += cgroup
+TARGETS += cpufreq
+TARGETS += cpu-hotplug
+TARGETS += efivarfs
+TARGETS += exec
+TARGETS += filesystems
+TARGETS += firmware
+TARGETS += ftrace
+TARGETS += futex
+TARGETS += gpio
+TARGETS += intel_pstate
+TARGETS += ipc
+TARGETS += kcmp
+TARGETS += kvm
+TARGETS += lib
+TARGETS += membarrier
+TARGETS += memfd
+TARGETS += memory-hotplug
+TARGETS += mount
+TARGETS += mqueue
+TARGETS += net
+TARGETS += netfilter
+TARGETS += nsfs
+TARGETS += powerpc
+TARGETS += proc
+TARGETS += pstore
+TARGETS += ptrace
+TARGETS += rseq
+TARGETS += rtc
+TARGETS += seccomp
+TARGETS += sigaltstack
+TARGETS += size
+TARGETS += sparc64
+TARGETS += splice
+TARGETS += static_keys
+TARGETS += sync
+TARGETS += sysctl
+ifneq (1, $(quicktest))
+TARGETS += timers
+endif
+TARGETS += user
+TARGETS += vm
+TARGETS += x86
+TARGETS += zram
+#Please keep the TARGETS list alphabetically sorted
+# Run "make quicktest=1 run_tests" or
+# "make quicktest=1 kselftest" from top level Makefile
+
+TARGETS_HOTPLUG = cpu-hotplug
+TARGETS_HOTPLUG += memory-hotplug
+
+# Clear LDFLAGS and MAKEFLAGS if called from main
+# Makefile to avoid test build failures when test
+# Makefile doesn't have explicit build rules.
+ifeq (1,$(MAKELEVEL))
+override LDFLAGS =
+override MAKEFLAGS =
+endif
+
+ifneq ($(KBUILD_SRC),)
+override LDFLAGS =
+endif
+
+BUILD := $(O)
+ifndef BUILD
+ BUILD := $(KBUILD_OUTPUT)
+endif
+ifndef BUILD
+ BUILD := $(shell pwd)
+endif
+
+# KSFT_TAP_LEVEL is used from KSFT framework to prevent nested TAP header
+# printing from tests. Applicable to run_tests case where run_tests adds
+# TAP header prior running tests and when a test program invokes another
+# with system() call. Export it here to cover override RUN_TESTS defines.
+export KSFT_TAP_LEVEL=`echo 1`
+
+export BUILD
+all:
+ @for TARGET in $(TARGETS); do \
+ BUILD_TARGET=$$BUILD/$$TARGET; \
+ mkdir $$BUILD_TARGET -p; \
+ make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
+ done;
+
+run_tests: all
+ @for TARGET in $(TARGETS); do \
+ BUILD_TARGET=$$BUILD/$$TARGET; \
+ make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
+ done;
+
+hotplug:
+ @for TARGET in $(TARGETS_HOTPLUG); do \
+ BUILD_TARGET=$$BUILD/$$TARGET; \
+ make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
+ done;
+
+run_hotplug: hotplug
+ @for TARGET in $(TARGETS_HOTPLUG); do \
+ BUILD_TARGET=$$BUILD/$$TARGET; \
+ make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\
+ done;
+
+clean_hotplug:
+ @for TARGET in $(TARGETS_HOTPLUG); do \
+ BUILD_TARGET=$$BUILD/$$TARGET; \
+ make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
+ done;
+
+run_pstore_crash:
+ make -C pstore run_crash
+
+INSTALL_PATH ?= install
+INSTALL_PATH := $(abspath $(INSTALL_PATH))
+ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh
+
+install:
+ifdef INSTALL_PATH
+ @# Ask all targets to install their files
+ mkdir -p $(INSTALL_PATH)
+ @for TARGET in $(TARGETS); do \
+ BUILD_TARGET=$$BUILD/$$TARGET; \
+ make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
+ done;
+
+ @# Ask all targets to emit their test scripts
+ echo "#!/bin/sh" > $(ALL_SCRIPT)
+ echo "BASE_DIR=\$$(realpath \$$(dirname \$$0))" >> $(ALL_SCRIPT)
+ echo "cd \$$BASE_DIR" >> $(ALL_SCRIPT)
+ echo "ROOT=\$$PWD" >> $(ALL_SCRIPT)
+ echo "if [ \"\$$1\" = \"--summary\" ]; then" >> $(ALL_SCRIPT)
+ echo " OUTPUT=\$$BASE_DIR/output.log" >> $(ALL_SCRIPT)
+ echo " cat /dev/null > \$$OUTPUT" >> $(ALL_SCRIPT)
+ echo "else" >> $(ALL_SCRIPT)
+ echo " OUTPUT=/dev/stdout" >> $(ALL_SCRIPT)
+ echo "fi" >> $(ALL_SCRIPT)
+ echo "export KSFT_TAP_LEVEL=1" >> $(ALL_SCRIPT)
+ echo "export skip=4" >> $(ALL_SCRIPT)
+
+ for TARGET in $(TARGETS); do \
+ BUILD_TARGET=$$BUILD/$$TARGET; \
+ echo "echo ; echo TAP version 13" >> $(ALL_SCRIPT); \
+ echo "echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \
+ echo "echo ========================================" >> $(ALL_SCRIPT); \
+ echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \
+ echo "cd $$TARGET" >> $(ALL_SCRIPT); \
+ make -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
+ echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
+ done;
+
+ chmod u+x $(ALL_SCRIPT)
+else
+ $(error Error: set INSTALL_PATH to use install)
+endif
+
+clean:
+ @for TARGET in $(TARGETS); do \
+ BUILD_TARGET=$$BUILD/$$TARGET; \
+ make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
+ done;
+
+.PHONY: all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean
diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile
new file mode 100644
index 000000000..72c25a3cb
--- /dev/null
+++ b/tools/testing/selftests/android/Makefile
@@ -0,0 +1,38 @@
+SUBDIRS := ion
+
+TEST_PROGS := run.sh
+
+.PHONY: all clean
+
+include ../lib.mk
+
+all:
+ @for DIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir $$BUILD_TARGET -p; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ #SUBDIR test prog name should be in the form: SUBDIR_test.sh \
+ TEST=$$DIR"_test.sh"; \
+ if [ -e $$DIR/$$TEST ]; then \
+ rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \
+ fi \
+ done
+
+override define INSTALL_RULE
+ mkdir -p $(INSTALL_PATH)
+ install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+
+ @for SUBDIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$SUBDIR; \
+ mkdir $$BUILD_TARGET -p; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
+ done;
+endef
+
+override define CLEAN
+ @for DIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir $$BUILD_TARGET -p; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ done
+endef
diff --git a/tools/testing/selftests/android/config b/tools/testing/selftests/android/config
new file mode 100644
index 000000000..b4ad748a9
--- /dev/null
+++ b/tools/testing/selftests/android/config
@@ -0,0 +1,5 @@
+CONFIG_ANDROID=y
+CONFIG_STAGING=y
+CONFIG_ION=y
+CONFIG_ION_SYSTEM_HEAP=y
+CONFIG_DRM_VGEM=y
diff --git a/tools/testing/selftests/android/ion/.gitignore b/tools/testing/selftests/android/ion/.gitignore
new file mode 100644
index 000000000..95e8f4561
--- /dev/null
+++ b/tools/testing/selftests/android/ion/.gitignore
@@ -0,0 +1,3 @@
+ionapp_export
+ionapp_import
+ionmap_test
diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile
new file mode 100644
index 000000000..88cfe88e4
--- /dev/null
+++ b/tools/testing/selftests/android/ion/Makefile
@@ -0,0 +1,19 @@
+
+INCLUDEDIR := -I. -I../../../../../drivers/staging/android/uapi/ -I../../../../../usr/include/
+CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g
+
+TEST_GEN_FILES := ionapp_export ionapp_import ionmap_test
+
+all: $(TEST_GEN_FILES)
+
+$(TEST_GEN_FILES): ipcsocket.c ionutils.c
+
+TEST_PROGS := ion_test.sh
+
+KSFT_KHDR_INSTALL := 1
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c
+$(OUTPUT)/ionapp_import: ionapp_import.c ipcsocket.c ionutils.c
+$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c
diff --git a/tools/testing/selftests/android/ion/README b/tools/testing/selftests/android/ion/README
new file mode 100644
index 000000000..21783e9c4
--- /dev/null
+++ b/tools/testing/selftests/android/ion/README
@@ -0,0 +1,101 @@
+ION BUFFER SHARING UTILITY
+==========================
+File: ion_test.sh : Utility to test ION driver buffer sharing mechanism.
+Author: Pintu Kumar <pintu.ping@gmail.com>
+
+Introduction:
+-------------
+This is a test utility to verify ION buffer sharing in user space
+between 2 independent processes.
+It uses unix domain socket (with SCM_RIGHTS) as IPC to transfer an FD to
+another process to share the same buffer.
+This utility demonstrates how ION buffer sharing can be implemented between
+two user space processes, using various heap types.
+The following heap types are supported by ION driver.
+ION_HEAP_TYPE_SYSTEM (0)
+ION_HEAP_TYPE_SYSTEM_CONTIG (1)
+ION_HEAP_TYPE_CARVEOUT (2)
+ION_HEAP_TYPE_CHUNK (3)
+ION_HEAP_TYPE_DMA (4)
+
+By default only the SYSTEM and SYSTEM_CONTIG heaps are supported.
+Each heap is associated with the respective heap id.
+This utility is designed in the form of client/server program.
+The server part (ionapp_export) is the exporter of the buffer.
+It is responsible for creating an ION client, allocating the buffer based on
+the heap id, writing some data to this buffer and then exporting the FD
+(associated with this buffer) to another process using socket IPC.
+This FD is called as buffer FD (which is different than the ION client FD).
+
+The client part (ionapp_import) is the importer of the buffer.
+It retrives the FD from the socket data and installs into its address space.
+This new FD internally points to the same kernel buffer.
+So first it reads the data that is stored in this buffer and prints it.
+Then it writes the different size of data (it could be different data) to the
+same buffer.
+Finally the buffer FD must be closed by both the exporter and importer.
+Thus the same kernel buffer is shared among two user space processes using
+ION driver and only one time allocation.
+
+Prerequisite:
+-------------
+This utility works only if /dev/ion interface is present.
+The following configs needs to be enabled in kernel to include ion driver.
+CONFIG_ANDROID=y
+CONFIG_STAGING=y
+CONFIG_ION=y
+CONFIG_ION_SYSTEM_HEAP=y
+
+This utility requires to be run as root user.
+
+
+Compile and test:
+-----------------
+This utility is made to be run as part of kselftest framework in kernel.
+To compile and run using kselftest you can simply do the following from the
+kernel top directory.
+linux$ make TARGETS=android kselftest
+Or you can also use:
+linux$ make -C tools/testing/selftests TARGETS=android run_tests
+Using the selftest it can directly execute the ion_test.sh script to test the
+buffer sharing using ion system heap.
+Currently the heap size is hard coded as just 10 bytes inside this script.
+You need to be a root user to run under selftest.
+
+You can also compile and test manually using the following steps:
+ion$ make
+These will generate 2 executable: ionapp_export, ionapp_import
+Now you can run the export and import manually by specifying the heap type
+and the heap size.
+You can also directly execute the shell script to run the test automatically.
+Simply use the following command to run the test.
+ion$ sudo ./ion_test.sh
+
+Test Results:
+-------------
+The utility is verified on Ubuntu-32 bit system with Linux Kernel 4.14.
+Here is the snapshot of the test result using kselftest.
+
+linux# make TARGETS=android kselftest
+heap_type: 0, heap_size: 10
+--------------------------------------
+heap type: 0
+ heap id: 1
+heap name: ion_system_heap
+--------------------------------------
+Fill buffer content:
+0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
+Sharing fd: 6, Client fd: 5
+<ion_close_buffer_fd>: buffer release successfully....
+Received buffer fd: 4
+Read buffer content:
+0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0x0 0x0 0x0 0x0 0x0 0x0
+0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0
+Fill buffer content:
+0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
+0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
+0xfd 0xfd
+<ion_close_buffer_fd>: buffer release successfully....
+ion_test.sh: heap_type: 0 - [PASS]
+
+ion_test.sh: done
diff --git a/tools/testing/selftests/android/ion/ion.h b/tools/testing/selftests/android/ion/ion.h
new file mode 100644
index 000000000..f7021ac51
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ion.h
@@ -0,0 +1,143 @@
+/*
+ * ion.h
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/* This file is copied from drivers/staging/android/uapi/ion.h
+ * This local copy is required for the selftest to pass, when build
+ * outside the kernel source tree.
+ * Please keep this file in sync with its original file until the
+ * ion driver is moved outside the staging tree.
+ */
+
+#ifndef _UAPI_LINUX_ION_H
+#define _UAPI_LINUX_ION_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/**
+ * enum ion_heap_types - list of all possible types of heaps
+ * @ION_HEAP_TYPE_SYSTEM: memory allocated via vmalloc
+ * @ION_HEAP_TYPE_SYSTEM_CONTIG: memory allocated via kmalloc
+ * @ION_HEAP_TYPE_CARVEOUT: memory allocated from a prereserved
+ * carveout heap, allocations are physically
+ * contiguous
+ * @ION_HEAP_TYPE_DMA: memory allocated via DMA API
+ * @ION_NUM_HEAPS: helper for iterating over heaps, a bit mask
+ * is used to identify the heaps, so only 32
+ * total heap types are supported
+ */
+enum ion_heap_type {
+ ION_HEAP_TYPE_SYSTEM,
+ ION_HEAP_TYPE_SYSTEM_CONTIG,
+ ION_HEAP_TYPE_CARVEOUT,
+ ION_HEAP_TYPE_CHUNK,
+ ION_HEAP_TYPE_DMA,
+ ION_HEAP_TYPE_CUSTOM, /*
+ * must be last so device specific heaps always
+ * are at the end of this enum
+ */
+};
+
+#define ION_NUM_HEAP_IDS (sizeof(unsigned int) * 8)
+
+/**
+ * allocation flags - the lower 16 bits are used by core ion, the upper 16
+ * bits are reserved for use by the heaps themselves.
+ */
+
+/*
+ * mappings of this buffer should be cached, ion will do cache maintenance
+ * when the buffer is mapped for dma
+ */
+#define ION_FLAG_CACHED 1
+
+/**
+ * DOC: Ion Userspace API
+ *
+ * create a client by opening /dev/ion
+ * most operations handled via following ioctls
+ *
+ */
+
+/**
+ * struct ion_allocation_data - metadata passed from userspace for allocations
+ * @len: size of the allocation
+ * @heap_id_mask: mask of heap ids to allocate from
+ * @flags: flags passed to heap
+ * @handle: pointer that will be populated with a cookie to use to
+ * refer to this allocation
+ *
+ * Provided by userspace as an argument to the ioctl
+ */
+struct ion_allocation_data {
+ __u64 len;
+ __u32 heap_id_mask;
+ __u32 flags;
+ __u32 fd;
+ __u32 unused;
+};
+
+#define MAX_HEAP_NAME 32
+
+/**
+ * struct ion_heap_data - data about a heap
+ * @name - first 32 characters of the heap name
+ * @type - heap type
+ * @heap_id - heap id for the heap
+ */
+struct ion_heap_data {
+ char name[MAX_HEAP_NAME];
+ __u32 type;
+ __u32 heap_id;
+ __u32 reserved0;
+ __u32 reserved1;
+ __u32 reserved2;
+};
+
+/**
+ * struct ion_heap_query - collection of data about all heaps
+ * @cnt - total number of heaps to be copied
+ * @heaps - buffer to copy heap data
+ */
+struct ion_heap_query {
+ __u32 cnt; /* Total number of heaps to be copied */
+ __u32 reserved0; /* align to 64bits */
+ __u64 heaps; /* buffer to be populated */
+ __u32 reserved1;
+ __u32 reserved2;
+};
+
+#define ION_IOC_MAGIC 'I'
+
+/**
+ * DOC: ION_IOC_ALLOC - allocate memory
+ *
+ * Takes an ion_allocation_data struct and returns it with the handle field
+ * populated with the opaque handle for the allocation.
+ */
+#define ION_IOC_ALLOC _IOWR(ION_IOC_MAGIC, 0, \
+ struct ion_allocation_data)
+
+/**
+ * DOC: ION_IOC_HEAP_QUERY - information about available heaps
+ *
+ * Takes an ion_heap_query structure and populates information about
+ * available Ion heaps.
+ */
+#define ION_IOC_HEAP_QUERY _IOWR(ION_IOC_MAGIC, 8, \
+ struct ion_heap_query)
+
+#endif /* _UAPI_LINUX_ION_H */
diff --git a/tools/testing/selftests/android/ion/ion_test.sh b/tools/testing/selftests/android/ion/ion_test.sh
new file mode 100755
index 000000000..69e676cfc
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ion_test.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+heapsize=4096
+TCID="ion_test.sh"
+errcode=0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+run_test()
+{
+ heaptype=$1
+ ./ionapp_export -i $heaptype -s $heapsize &
+ sleep 1
+ ./ionapp_import
+ if [ $? -ne 0 ]; then
+ echo "$TCID: heap_type: $heaptype - [FAIL]"
+ errcode=1
+ else
+ echo "$TCID: heap_type: $heaptype - [PASS]"
+ fi
+ sleep 1
+ echo ""
+}
+
+check_root()
+{
+ uid=$(id -u)
+ if [ $uid -ne 0 ]; then
+ echo $TCID: must be run as root >&2
+ exit $ksft_skip
+ fi
+}
+
+check_device()
+{
+ DEVICE=/dev/ion
+ if [ ! -e $DEVICE ]; then
+ echo $TCID: No $DEVICE device found >&2
+ echo $TCID: May be CONFIG_ION is not set >&2
+ exit $ksft_skip
+ fi
+}
+
+main_function()
+{
+ check_device
+ check_root
+
+ # ION_SYSTEM_HEAP TEST
+ run_test 0
+ # ION_SYSTEM_CONTIG_HEAP TEST
+ run_test 1
+}
+
+main_function
+echo "$TCID: done"
+exit $errcode
diff --git a/tools/testing/selftests/android/ion/ionapp_export.c b/tools/testing/selftests/android/ion/ionapp_export.c
new file mode 100644
index 000000000..b5fa0a2dc
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionapp_export.c
@@ -0,0 +1,136 @@
+/*
+ * ionapp_export.c
+ *
+ * It is a user space utility to create and export android
+ * ion memory buffer fd to another process using unix domain socket as IPC.
+ * This acts like a server for ionapp_import(client).
+ * So, this server has to be started first before the client.
+ *
+ * Copyright (C) 2017 Pintu Kumar <pintu.ping@gmail.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/time.h>
+#include "ionutils.h"
+#include "ipcsocket.h"
+
+
+void print_usage(int argc, char *argv[])
+{
+ printf("Usage: %s [-h <help>] [-i <heap id>] [-s <size in bytes>]\n",
+ argv[0]);
+}
+
+int main(int argc, char *argv[])
+{
+ int opt, ret, status, heapid;
+ int sockfd, client_fd, shared_fd;
+ unsigned char *map_buf;
+ unsigned long map_len, heap_type, heap_size, flags;
+ struct ion_buffer_info info;
+ struct socket_info skinfo;
+
+ if (argc < 2) {
+ print_usage(argc, argv);
+ return -1;
+ }
+
+ heap_size = 0;
+ flags = 0;
+ heap_type = ION_HEAP_TYPE_SYSTEM;
+
+ while ((opt = getopt(argc, argv, "hi:s:")) != -1) {
+ switch (opt) {
+ case 'h':
+ print_usage(argc, argv);
+ exit(0);
+ break;
+ case 'i':
+ heapid = atoi(optarg);
+ switch (heapid) {
+ case 0:
+ heap_type = ION_HEAP_TYPE_SYSTEM;
+ break;
+ case 1:
+ heap_type = ION_HEAP_TYPE_SYSTEM_CONTIG;
+ break;
+ default:
+ printf("ERROR: heap type not supported\n");
+ exit(1);
+ }
+ break;
+ case 's':
+ heap_size = atoi(optarg);
+ break;
+ default:
+ print_usage(argc, argv);
+ exit(1);
+ break;
+ }
+ }
+
+ if (heap_size <= 0) {
+ printf("heap_size cannot be 0\n");
+ print_usage(argc, argv);
+ exit(1);
+ }
+
+ printf("heap_type: %ld, heap_size: %ld\n", heap_type, heap_size);
+ info.heap_type = heap_type;
+ info.heap_size = heap_size;
+ info.flag_type = flags;
+
+ /* This is server: open the socket connection first */
+ /* Here; 1 indicates server or exporter */
+ status = opensocket(&sockfd, SOCKET_NAME, 1);
+ if (status < 0) {
+ fprintf(stderr, "<%s>: Failed opensocket.\n", __func__);
+ goto err_socket;
+ }
+ skinfo.sockfd = sockfd;
+
+ ret = ion_export_buffer_fd(&info);
+ if (ret < 0) {
+ fprintf(stderr, "FAILED: ion_get_buffer_fd\n");
+ goto err_export;
+ }
+ client_fd = info.ionfd;
+ shared_fd = info.buffd;
+ map_buf = info.buffer;
+ map_len = info.buflen;
+ write_buffer(map_buf, map_len);
+
+ /* share ion buf fd with other user process */
+ printf("Sharing fd: %d, Client fd: %d\n", shared_fd, client_fd);
+ skinfo.datafd = shared_fd;
+ skinfo.buflen = map_len;
+
+ ret = socket_send_fd(&skinfo);
+ if (ret < 0) {
+ fprintf(stderr, "FAILED: socket_send_fd\n");
+ goto err_send;
+ }
+
+err_send:
+err_export:
+ ion_close_buffer_fd(&info);
+
+err_socket:
+ closesocket(sockfd, SOCKET_NAME);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/android/ion/ionapp_import.c b/tools/testing/selftests/android/ion/ionapp_import.c
new file mode 100644
index 000000000..ae2d704cf
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionapp_import.c
@@ -0,0 +1,88 @@
+/*
+ * ionapp_import.c
+ *
+ * It is a user space utility to receive android ion memory buffer fd
+ * over unix domain socket IPC that can be exported by ionapp_export.
+ * This acts like a client for ionapp_export.
+ *
+ * Copyright (C) 2017 Pintu Kumar <pintu.ping@gmail.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include "ionutils.h"
+#include "ipcsocket.h"
+
+
+int main(void)
+{
+ int ret, status;
+ int sockfd, shared_fd;
+ unsigned char *map_buf;
+ unsigned long map_len;
+ struct ion_buffer_info info;
+ struct socket_info skinfo;
+
+ /* This is the client part. Here 0 means client or importer */
+ status = opensocket(&sockfd, SOCKET_NAME, 0);
+ if (status < 0) {
+ fprintf(stderr, "No exporter exists...\n");
+ ret = status;
+ goto err_socket;
+ }
+
+ skinfo.sockfd = sockfd;
+
+ ret = socket_receive_fd(&skinfo);
+ if (ret < 0) {
+ fprintf(stderr, "Failed: socket_receive_fd\n");
+ goto err_recv;
+ }
+
+ shared_fd = skinfo.datafd;
+ printf("Received buffer fd: %d\n", shared_fd);
+ if (shared_fd <= 0) {
+ fprintf(stderr, "ERROR: improper buf fd\n");
+ ret = -1;
+ goto err_fd;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.buffd = shared_fd;
+ info.buflen = ION_BUFFER_LEN;
+
+ ret = ion_import_buffer_fd(&info);
+ if (ret < 0) {
+ fprintf(stderr, "Failed: ion_use_buffer_fd\n");
+ goto err_import;
+ }
+
+ map_buf = info.buffer;
+ map_len = info.buflen;
+ read_buffer(map_buf, map_len);
+
+ /* Write probably new data to the same buffer again */
+ map_len = ION_BUFFER_LEN;
+ write_buffer(map_buf, map_len);
+
+err_import:
+ ion_close_buffer_fd(&info);
+err_fd:
+err_recv:
+err_socket:
+ closesocket(sockfd, SOCKET_NAME);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/android/ion/ionmap_test.c b/tools/testing/selftests/android/ion/ionmap_test.c
new file mode 100644
index 000000000..dab36b06b
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionmap_test.c
@@ -0,0 +1,136 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/dma-buf.h>
+
+#include <drm/drm.h>
+
+#include "ion.h"
+#include "ionutils.h"
+
+int check_vgem(int fd)
+{
+ drm_version_t version = { 0 };
+ char name[5];
+ int ret;
+
+ version.name_len = 4;
+ version.name = name;
+
+ ret = ioctl(fd, DRM_IOCTL_VERSION, &version);
+ if (ret)
+ return 1;
+
+ return strcmp(name, "vgem");
+}
+
+int open_vgem(void)
+{
+ int i, fd;
+ const char *drmstr = "/dev/dri/card";
+
+ fd = -1;
+ for (i = 0; i < 16; i++) {
+ char name[80];
+
+ sprintf(name, "%s%u", drmstr, i);
+
+ fd = open(name, O_RDWR);
+ if (fd < 0)
+ continue;
+
+ if (check_vgem(fd)) {
+ close(fd);
+ continue;
+ } else {
+ break;
+ }
+
+ }
+ return fd;
+}
+
+int import_vgem_fd(int vgem_fd, int dma_buf_fd, uint32_t *handle)
+{
+ struct drm_prime_handle import_handle = { 0 };
+ int ret;
+
+ import_handle.fd = dma_buf_fd;
+ import_handle.flags = 0;
+ import_handle.handle = 0;
+
+ ret = ioctl(vgem_fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &import_handle);
+ if (ret == 0)
+ *handle = import_handle.handle;
+ return ret;
+}
+
+void close_handle(int vgem_fd, uint32_t handle)
+{
+ struct drm_gem_close close = { 0 };
+
+ close.handle = handle;
+ ioctl(vgem_fd, DRM_IOCTL_GEM_CLOSE, &close);
+}
+
+int main()
+{
+ int ret, vgem_fd;
+ struct ion_buffer_info info;
+ uint32_t handle = 0;
+ struct dma_buf_sync sync = { 0 };
+
+ info.heap_type = ION_HEAP_TYPE_SYSTEM;
+ info.heap_size = 4096;
+ info.flag_type = ION_FLAG_CACHED;
+
+ ret = ion_export_buffer_fd(&info);
+ if (ret < 0) {
+ printf("ion buffer alloc failed\n");
+ return -1;
+ }
+
+ vgem_fd = open_vgem();
+ if (vgem_fd < 0) {
+ ret = vgem_fd;
+ printf("Failed to open vgem\n");
+ goto out_ion;
+ }
+
+ ret = import_vgem_fd(vgem_fd, info.buffd, &handle);
+
+ if (ret < 0) {
+ printf("Failed to import buffer\n");
+ goto out_vgem;
+ }
+
+ sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW;
+ ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
+ if (ret)
+ printf("sync start failed %d\n", errno);
+
+ memset(info.buffer, 0xff, 4096);
+
+ sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW;
+ ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
+ if (ret)
+ printf("sync end failed %d\n", errno);
+
+ close_handle(vgem_fd, handle);
+ ret = 0;
+
+out_vgem:
+ close(vgem_fd);
+out_ion:
+ ion_close_buffer_fd(&info);
+ printf("done.\n");
+ return ret;
+}
diff --git a/tools/testing/selftests/android/ion/ionutils.c b/tools/testing/selftests/android/ion/ionutils.c
new file mode 100644
index 000000000..7d1d37c4e
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionutils.c
@@ -0,0 +1,253 @@
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+//#include <stdint.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include "ionutils.h"
+#include "ipcsocket.h"
+
+
+void write_buffer(void *buffer, unsigned long len)
+{
+ int i;
+ unsigned char *ptr = (unsigned char *)buffer;
+
+ if (!ptr) {
+ fprintf(stderr, "<%s>: Invalid buffer...\n", __func__);
+ return;
+ }
+
+ printf("Fill buffer content:\n");
+ memset(ptr, 0xfd, len);
+ for (i = 0; i < len; i++)
+ printf("0x%x ", ptr[i]);
+ printf("\n");
+}
+
+void read_buffer(void *buffer, unsigned long len)
+{
+ int i;
+ unsigned char *ptr = (unsigned char *)buffer;
+
+ if (!ptr) {
+ fprintf(stderr, "<%s>: Invalid buffer...\n", __func__);
+ return;
+ }
+
+ printf("Read buffer content:\n");
+ for (i = 0; i < len; i++)
+ printf("0x%x ", ptr[i]);
+ printf("\n");
+}
+
+int ion_export_buffer_fd(struct ion_buffer_info *ion_info)
+{
+ int i, ret, ionfd, buffer_fd;
+ unsigned int heap_id;
+ unsigned long maplen;
+ unsigned char *map_buffer;
+ struct ion_allocation_data alloc_data;
+ struct ion_heap_query query;
+ struct ion_heap_data heap_data[MAX_HEAP_COUNT];
+
+ if (!ion_info) {
+ fprintf(stderr, "<%s>: Invalid ion info\n", __func__);
+ return -1;
+ }
+
+ /* Create an ION client */
+ ionfd = open(ION_DEVICE, O_RDWR);
+ if (ionfd < 0) {
+ fprintf(stderr, "<%s>: Failed to open ion client: %s\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+
+ memset(&query, 0, sizeof(query));
+ query.cnt = MAX_HEAP_COUNT;
+ query.heaps = (unsigned long int)&heap_data[0];
+ /* Query ION heap_id_mask from ION heap */
+ ret = ioctl(ionfd, ION_IOC_HEAP_QUERY, &query);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed: ION_IOC_HEAP_QUERY: %s\n",
+ __func__, strerror(errno));
+ goto err_query;
+ }
+
+ heap_id = MAX_HEAP_COUNT + 1;
+ for (i = 0; i < query.cnt; i++) {
+ if (heap_data[i].type == ion_info->heap_type) {
+ heap_id = heap_data[i].heap_id;
+ break;
+ }
+ }
+
+ if (heap_id > MAX_HEAP_COUNT) {
+ fprintf(stderr, "<%s>: ERROR: heap type does not exists\n",
+ __func__);
+ goto err_heap;
+ }
+
+ alloc_data.len = ion_info->heap_size;
+ alloc_data.heap_id_mask = 1 << heap_id;
+ alloc_data.flags = ion_info->flag_type;
+
+ /* Allocate memory for this ION client as per heap_type */
+ ret = ioctl(ionfd, ION_IOC_ALLOC, &alloc_data);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed: ION_IOC_ALLOC: %s\n",
+ __func__, strerror(errno));
+ goto err_alloc;
+ }
+
+ /* This will return a valid buffer fd */
+ buffer_fd = alloc_data.fd;
+ maplen = alloc_data.len;
+
+ if (buffer_fd < 0 || maplen <= 0) {
+ fprintf(stderr, "<%s>: Invalid map data, fd: %d, len: %ld\n",
+ __func__, buffer_fd, maplen);
+ goto err_fd_data;
+ }
+
+ /* Create memory mapped buffer for the buffer fd */
+ map_buffer = (unsigned char *)mmap(NULL, maplen, PROT_READ|PROT_WRITE,
+ MAP_SHARED, buffer_fd, 0);
+ if (map_buffer == MAP_FAILED) {
+ fprintf(stderr, "<%s>: Failed: mmap: %s\n",
+ __func__, strerror(errno));
+ goto err_mmap;
+ }
+
+ ion_info->ionfd = ionfd;
+ ion_info->buffd = buffer_fd;
+ ion_info->buffer = map_buffer;
+ ion_info->buflen = maplen;
+
+ return 0;
+
+ munmap(map_buffer, maplen);
+
+err_fd_data:
+err_mmap:
+ /* in case of error: close the buffer fd */
+ if (buffer_fd)
+ close(buffer_fd);
+
+err_query:
+err_heap:
+err_alloc:
+ /* In case of error: close the ion client fd */
+ if (ionfd)
+ close(ionfd);
+
+ return -1;
+}
+
+int ion_import_buffer_fd(struct ion_buffer_info *ion_info)
+{
+ int buffd;
+ unsigned char *map_buf;
+ unsigned long map_len;
+
+ if (!ion_info) {
+ fprintf(stderr, "<%s>: Invalid ion info\n", __func__);
+ return -1;
+ }
+
+ map_len = ion_info->buflen;
+ buffd = ion_info->buffd;
+
+ if (buffd < 0 || map_len <= 0) {
+ fprintf(stderr, "<%s>: Invalid map data, fd: %d, len: %ld\n",
+ __func__, buffd, map_len);
+ goto err_buffd;
+ }
+
+ map_buf = (unsigned char *)mmap(NULL, map_len, PROT_READ|PROT_WRITE,
+ MAP_SHARED, buffd, 0);
+ if (map_buf == MAP_FAILED) {
+ printf("<%s>: Failed - mmap: %s\n",
+ __func__, strerror(errno));
+ goto err_mmap;
+ }
+
+ ion_info->buffer = map_buf;
+ ion_info->buflen = map_len;
+
+ return 0;
+
+err_mmap:
+ if (buffd)
+ close(buffd);
+
+err_buffd:
+ return -1;
+}
+
+void ion_close_buffer_fd(struct ion_buffer_info *ion_info)
+{
+ if (ion_info) {
+ /* unmap the buffer properly in the end */
+ munmap(ion_info->buffer, ion_info->buflen);
+ /* close the buffer fd */
+ if (ion_info->buffd > 0)
+ close(ion_info->buffd);
+ /* Finally, close the client fd */
+ if (ion_info->ionfd > 0)
+ close(ion_info->ionfd);
+ }
+}
+
+int socket_send_fd(struct socket_info *info)
+{
+ int status;
+ int fd, sockfd;
+ struct socketdata skdata;
+
+ if (!info) {
+ fprintf(stderr, "<%s>: Invalid socket info\n", __func__);
+ return -1;
+ }
+
+ sockfd = info->sockfd;
+ fd = info->datafd;
+ memset(&skdata, 0, sizeof(skdata));
+ skdata.data = fd;
+ skdata.len = sizeof(skdata.data);
+ status = sendtosocket(sockfd, &skdata);
+ if (status < 0) {
+ fprintf(stderr, "<%s>: Failed: sendtosocket\n", __func__);
+ return -1;
+ }
+
+ return 0;
+}
+
+int socket_receive_fd(struct socket_info *info)
+{
+ int status;
+ int fd, sockfd;
+ struct socketdata skdata;
+
+ if (!info) {
+ fprintf(stderr, "<%s>: Invalid socket info\n", __func__);
+ return -1;
+ }
+
+ sockfd = info->sockfd;
+ memset(&skdata, 0, sizeof(skdata));
+ status = receivefromsocket(sockfd, &skdata);
+ if (status < 0) {
+ fprintf(stderr, "<%s>: Failed: receivefromsocket\n", __func__);
+ return -1;
+ }
+
+ fd = (int)skdata.data;
+ info->datafd = fd;
+
+ return status;
+}
diff --git a/tools/testing/selftests/android/ion/ionutils.h b/tools/testing/selftests/android/ion/ionutils.h
new file mode 100644
index 000000000..9941eb858
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionutils.h
@@ -0,0 +1,55 @@
+#ifndef __ION_UTILS_H
+#define __ION_UTILS_H
+
+#include "ion.h"
+
+#define SOCKET_NAME "ion_socket"
+#define ION_DEVICE "/dev/ion"
+
+#define ION_BUFFER_LEN 4096
+#define MAX_HEAP_COUNT ION_HEAP_TYPE_CUSTOM
+
+struct socket_info {
+ int sockfd;
+ int datafd;
+ unsigned long buflen;
+};
+
+struct ion_buffer_info {
+ int ionfd;
+ int buffd;
+ unsigned int heap_type;
+ unsigned int flag_type;
+ unsigned long heap_size;
+ unsigned long buflen;
+ unsigned char *buffer;
+};
+
+
+/* This is used to fill the data into the mapped buffer */
+void write_buffer(void *buffer, unsigned long len);
+
+/* This is used to read the data from the exported buffer */
+void read_buffer(void *buffer, unsigned long len);
+
+/* This is used to create an ION buffer FD for the kernel buffer
+ * So you can export this same buffer to others in the form of FD
+ */
+int ion_export_buffer_fd(struct ion_buffer_info *ion_info);
+
+/* This is used to import or map an exported FD.
+ * So we point to same buffer without making a copy. Hence zero-copy.
+ */
+int ion_import_buffer_fd(struct ion_buffer_info *ion_info);
+
+/* This is used to close all references for the ION client */
+void ion_close_buffer_fd(struct ion_buffer_info *ion_info);
+
+/* This is used to send FD to another process using socket IPC */
+int socket_send_fd(struct socket_info *skinfo);
+
+/* This is used to receive FD from another process using socket IPC */
+int socket_receive_fd(struct socket_info *skinfo);
+
+
+#endif
diff --git a/tools/testing/selftests/android/ion/ipcsocket.c b/tools/testing/selftests/android/ion/ipcsocket.c
new file mode 100644
index 000000000..7dc521002
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ipcsocket.c
@@ -0,0 +1,227 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/un.h>
+#include <errno.h>
+
+#include "ipcsocket.h"
+
+
+int opensocket(int *sockfd, const char *name, int connecttype)
+{
+ int ret, temp = 1;
+
+ if (!name || strlen(name) > MAX_SOCK_NAME_LEN) {
+ fprintf(stderr, "<%s>: Invalid socket name.\n", __func__);
+ return -1;
+ }
+
+ ret = socket(PF_LOCAL, SOCK_STREAM, 0);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed socket: <%s>\n",
+ __func__, strerror(errno));
+ return ret;
+ }
+
+ *sockfd = ret;
+ if (setsockopt(*sockfd, SOL_SOCKET, SO_REUSEADDR,
+ (char *)&temp, sizeof(int)) < 0) {
+ fprintf(stderr, "<%s>: Failed setsockopt: <%s>\n",
+ __func__, strerror(errno));
+ goto err;
+ }
+
+ sprintf(sock_name, "/tmp/%s", name);
+
+ if (connecttype == 1) {
+ /* This is for Server connection */
+ struct sockaddr_un skaddr;
+ int clientfd;
+ socklen_t sklen;
+
+ unlink(sock_name);
+ memset(&skaddr, 0, sizeof(skaddr));
+ skaddr.sun_family = AF_LOCAL;
+ strcpy(skaddr.sun_path, sock_name);
+
+ ret = bind(*sockfd, (struct sockaddr *)&skaddr,
+ SUN_LEN(&skaddr));
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed bind: <%s>\n",
+ __func__, strerror(errno));
+ goto err;
+ }
+
+ ret = listen(*sockfd, 5);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed listen: <%s>\n",
+ __func__, strerror(errno));
+ goto err;
+ }
+
+ memset(&skaddr, 0, sizeof(skaddr));
+ sklen = sizeof(skaddr);
+
+ ret = accept(*sockfd, (struct sockaddr *)&skaddr,
+ (socklen_t *)&sklen);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed accept: <%s>\n",
+ __func__, strerror(errno));
+ goto err;
+ }
+
+ clientfd = ret;
+ *sockfd = clientfd;
+ } else {
+ /* This is for client connection */
+ struct sockaddr_un skaddr;
+
+ memset(&skaddr, 0, sizeof(skaddr));
+ skaddr.sun_family = AF_LOCAL;
+ strcpy(skaddr.sun_path, sock_name);
+
+ ret = connect(*sockfd, (struct sockaddr *)&skaddr,
+ SUN_LEN(&skaddr));
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed connect: <%s>\n",
+ __func__, strerror(errno));
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ if (*sockfd)
+ close(*sockfd);
+
+ return ret;
+}
+
+int sendtosocket(int sockfd, struct socketdata *skdata)
+{
+ int ret, buffd;
+ unsigned int len;
+ char cmsg_b[CMSG_SPACE(sizeof(int))];
+ struct cmsghdr *cmsg;
+ struct msghdr msgh;
+ struct iovec iov;
+ struct timeval timeout;
+ fd_set selFDs;
+
+ if (!skdata) {
+ fprintf(stderr, "<%s>: socketdata is NULL\n", __func__);
+ return -1;
+ }
+
+ FD_ZERO(&selFDs);
+ FD_SET(0, &selFDs);
+ FD_SET(sockfd, &selFDs);
+ timeout.tv_sec = 20;
+ timeout.tv_usec = 0;
+
+ ret = select(sockfd+1, NULL, &selFDs, NULL, &timeout);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed select: <%s>\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+
+ if (FD_ISSET(sockfd, &selFDs)) {
+ buffd = skdata->data;
+ len = skdata->len;
+ memset(&msgh, 0, sizeof(msgh));
+ msgh.msg_control = &cmsg_b;
+ msgh.msg_controllen = CMSG_LEN(len);
+ iov.iov_base = "OK";
+ iov.iov_len = 2;
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ cmsg = CMSG_FIRSTHDR(&msgh);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(len);
+ memcpy(CMSG_DATA(cmsg), &buffd, len);
+
+ ret = sendmsg(sockfd, &msgh, MSG_DONTWAIT);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed sendmsg: <%s>\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int receivefromsocket(int sockfd, struct socketdata *skdata)
+{
+ int ret, buffd;
+ unsigned int len = 0;
+ char cmsg_b[CMSG_SPACE(sizeof(int))];
+ struct cmsghdr *cmsg;
+ struct msghdr msgh;
+ struct iovec iov;
+ fd_set recvFDs;
+ char data[32];
+
+ if (!skdata) {
+ fprintf(stderr, "<%s>: socketdata is NULL\n", __func__);
+ return -1;
+ }
+
+ FD_ZERO(&recvFDs);
+ FD_SET(0, &recvFDs);
+ FD_SET(sockfd, &recvFDs);
+
+ ret = select(sockfd+1, &recvFDs, NULL, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed select: <%s>\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+
+ if (FD_ISSET(sockfd, &recvFDs)) {
+ len = sizeof(buffd);
+ memset(&msgh, 0, sizeof(msgh));
+ msgh.msg_control = &cmsg_b;
+ msgh.msg_controllen = CMSG_LEN(len);
+ iov.iov_base = data;
+ iov.iov_len = sizeof(data)-1;
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ cmsg = CMSG_FIRSTHDR(&msgh);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(len);
+
+ ret = recvmsg(sockfd, &msgh, MSG_DONTWAIT);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed recvmsg: <%s>\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+
+ memcpy(&buffd, CMSG_DATA(cmsg), len);
+ skdata->data = buffd;
+ skdata->len = len;
+ }
+ return 0;
+}
+
+int closesocket(int sockfd, char *name)
+{
+ char sockname[MAX_SOCK_NAME_LEN];
+
+ if (sockfd)
+ close(sockfd);
+ sprintf(sockname, "/tmp/%s", name);
+ unlink(sockname);
+ shutdown(sockfd, 2);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/android/ion/ipcsocket.h b/tools/testing/selftests/android/ion/ipcsocket.h
new file mode 100644
index 000000000..b3e84498a
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ipcsocket.h
@@ -0,0 +1,35 @@
+
+#ifndef _IPCSOCKET_H
+#define _IPCSOCKET_H
+
+
+#define MAX_SOCK_NAME_LEN 64
+
+char sock_name[MAX_SOCK_NAME_LEN];
+
+/* This structure is responsible for holding the IPC data
+ * data: hold the buffer fd
+ * len: just the length of 32-bit integer fd
+ */
+struct socketdata {
+ int data;
+ unsigned int len;
+};
+
+/* This API is used to open the IPC socket connection
+ * name: implies a unique socket name in the system
+ * connecttype: implies server(0) or client(1)
+ */
+int opensocket(int *sockfd, const char *name, int connecttype);
+
+/* This is the API to send socket data over IPC socket */
+int sendtosocket(int sockfd, struct socketdata *data);
+
+/* This is the API to receive socket data over IPC socket */
+int receivefromsocket(int sockfd, struct socketdata *data);
+
+/* This is the API to close the socket connection */
+int closesocket(int sockfd, char *name);
+
+
+#endif
diff --git a/tools/testing/selftests/android/run.sh b/tools/testing/selftests/android/run.sh
new file mode 100755
index 000000000..dd8edf291
--- /dev/null
+++ b/tools/testing/selftests/android/run.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+(cd ion; ./ion_test.sh)
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
new file mode 100644
index 000000000..49938d72c
--- /dev/null
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -0,0 +1,21 @@
+test_verifier
+test_maps
+test_lru_map
+test_lpm_map
+test_tag
+FEATURE-DUMP.libbpf
+fixdep
+test_align
+test_dev_cgroup
+test_progs
+test_tcpbpf_user
+test_verifier_log
+feature
+test_libbpf_open
+test_sock
+test_sock_addr
+urandom_read
+test_btf
+test_sockmap
+test_lirc_mode2_user
+get_cgroup_id_user
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
new file mode 100644
index 000000000..f3f874ba1
--- /dev/null
+++ b/tools/testing/selftests/bpf/Makefile
@@ -0,0 +1,145 @@
+# SPDX-License-Identifier: GPL-2.0
+
+LIBDIR := ../../../lib
+BPFDIR := $(LIBDIR)/bpf
+APIDIR := ../../../include/uapi
+GENDIR := ../../../../include/generated
+GENHDR := $(GENDIR)/autoconf.h
+
+ifneq ($(wildcard $(GENHDR)),)
+ GENFLAGS := -DHAVE_GENHDR
+endif
+
+CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
+LDLIBS += -lcap -lelf -lrt -lpthread
+
+TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
+all: $(TEST_CUSTOM_PROGS)
+
+$(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c
+ $(CC) -o $(TEST_CUSTOM_PROGS) -static $< -Wl,--build-id
+
+# Order correspond to 'make run_tests' order
+TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
+ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
+ test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
+ test_socket_cookie test_cgroup_storage test_select_reuseport
+
+TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
+ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
+ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
+ test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
+ sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
+ sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
+ test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
+ test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
+ test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
+ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
+ test_skb_cgroup_id_kern.o
+
+# Order correspond to 'make run_tests' order
+TEST_PROGS := test_kmod.sh \
+ test_libbpf.sh \
+ test_xdp_redirect.sh \
+ test_xdp_meta.sh \
+ test_offload.py \
+ test_sock_addr.sh \
+ test_tunnel.sh \
+ test_lwt_seg6local.sh \
+ test_lirc_mode2.sh \
+ test_skb_cgroup_id.sh
+
+# Compile but not part of 'make run_tests'
+TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user
+
+include ../lib.mk
+
+BPFOBJ := $(OUTPUT)/libbpf.a
+
+$(TEST_GEN_PROGS): $(BPFOBJ)
+
+$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
+
+$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
+$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
+$(OUTPUT)/test_sock: cgroup_helpers.c
+$(OUTPUT)/test_sock_addr: cgroup_helpers.c
+$(OUTPUT)/test_socket_cookie: cgroup_helpers.c
+$(OUTPUT)/test_sockmap: cgroup_helpers.c
+$(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
+$(OUTPUT)/test_progs: trace_helpers.c
+$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
+$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
+
+.PHONY: force
+
+# force a rebuild of BPFOBJ when its dependencies are updated
+force:
+
+$(BPFOBJ): force
+ $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
+
+CLANG ?= clang
+LLC ?= llc
+LLVM_OBJCOPY ?= llvm-objcopy
+BTF_PAHOLE ?= pahole
+
+PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
+
+# Let newer LLVM versions transparently probe the kernel for availability
+# of full BPF instruction set.
+ifeq ($(PROBE),)
+ CPU ?= probe
+else
+ CPU ?= generic
+endif
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+
+CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
+ $(CLANG_SYS_INCLUDES) \
+ -Wno-compare-distinct-pointer-types
+
+$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
+$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
+
+BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
+BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
+BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
+
+ifneq ($(BTF_LLC_PROBE),)
+ifneq ($(BTF_PAHOLE_PROBE),)
+ifneq ($(BTF_OBJCOPY_PROBE),)
+ CLANG_FLAGS += -g
+ LLC_FLAGS += -mattr=dwarfris
+ DWARF2BTF = y
+endif
+endif
+endif
+
+# Have one program compiled without "-target bpf" to test whether libbpf loads
+# it successfully
+$(OUTPUT)/test_xdp.o: test_xdp.c
+ $(CLANG) $(CLANG_FLAGS) \
+ -O2 -emit-llvm -c $< -o - | \
+ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
+ifeq ($(DWARF2BTF),y)
+ $(BTF_PAHOLE) -J $@
+endif
+
+$(OUTPUT)/%.o: %.c
+ $(CLANG) $(CLANG_FLAGS) \
+ -O2 -target bpf -emit-llvm -c $< -o - | \
+ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
+ifeq ($(DWARF2BTF),y)
+ $(BTF_PAHOLE) -J $@
+endif
+
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h
new file mode 100644
index 000000000..b25595ea4
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_endian.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_ENDIAN__
+#define __BPF_ENDIAN__
+
+#include <linux/swab.h>
+
+/* LLVM's BPF target selects the endianness of the CPU
+ * it compiles on, or the user specifies (bpfel/bpfeb),
+ * respectively. The used __BYTE_ORDER__ is defined by
+ * the compiler, we cannot rely on __BYTE_ORDER from
+ * libc headers, since it doesn't reflect the actual
+ * requested byte order.
+ *
+ * Note, LLVM's BPF target has different __builtin_bswapX()
+ * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE
+ * in bpfel and bpfeb case, which means below, that we map
+ * to cpu_to_be16(). We could use it unconditionally in BPF
+ * case, but better not rely on it, so that this header here
+ * can be used from application and BPF program side, which
+ * use different targets.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define __bpf_ntohs(x) __builtin_bswap16(x)
+# define __bpf_htons(x) __builtin_bswap16(x)
+# define __bpf_constant_ntohs(x) ___constant_swab16(x)
+# define __bpf_constant_htons(x) ___constant_swab16(x)
+# define __bpf_ntohl(x) __builtin_bswap32(x)
+# define __bpf_htonl(x) __builtin_bswap32(x)
+# define __bpf_constant_ntohl(x) ___constant_swab32(x)
+# define __bpf_constant_htonl(x) ___constant_swab32(x)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# define __bpf_ntohs(x) (x)
+# define __bpf_htons(x) (x)
+# define __bpf_constant_ntohs(x) (x)
+# define __bpf_constant_htons(x) (x)
+# define __bpf_ntohl(x) (x)
+# define __bpf_htonl(x) (x)
+# define __bpf_constant_ntohl(x) (x)
+# define __bpf_constant_htonl(x) (x)
+#else
+# error "Fix your compiler's __BYTE_ORDER__?!"
+#endif
+
+#define bpf_htons(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_htons(x) : __bpf_htons(x))
+#define bpf_ntohs(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_ntohs(x) : __bpf_ntohs(x))
+#define bpf_htonl(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_htonl(x) : __bpf_htonl(x))
+#define bpf_ntohl(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_ntohl(x) : __bpf_ntohl(x))
+
+#endif /* __BPF_ENDIAN__ */
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
new file mode 100644
index 000000000..e4be77302
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -0,0 +1,336 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_HELPERS_H
+#define __BPF_HELPERS_H
+
+/* helper macro to place programs, maps, license in
+ * different sections in elf_bpf file. Section names
+ * are interpreted by elf_bpf loader
+ */
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+/* helper functions called from eBPF programs written in C */
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+ (void *) BPF_FUNC_map_lookup_elem;
+static int (*bpf_map_update_elem)(void *map, void *key, void *value,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_map_update_elem;
+static int (*bpf_map_delete_elem)(void *map, void *key) =
+ (void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
+ (void *) BPF_FUNC_probe_read;
+static unsigned long long (*bpf_ktime_get_ns)(void) =
+ (void *) BPF_FUNC_ktime_get_ns;
+static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
+ (void *) BPF_FUNC_trace_printk;
+static void (*bpf_tail_call)(void *ctx, void *map, int index) =
+ (void *) BPF_FUNC_tail_call;
+static unsigned long long (*bpf_get_smp_processor_id)(void) =
+ (void *) BPF_FUNC_get_smp_processor_id;
+static unsigned long long (*bpf_get_current_pid_tgid)(void) =
+ (void *) BPF_FUNC_get_current_pid_tgid;
+static unsigned long long (*bpf_get_current_uid_gid)(void) =
+ (void *) BPF_FUNC_get_current_uid_gid;
+static int (*bpf_get_current_comm)(void *buf, int buf_size) =
+ (void *) BPF_FUNC_get_current_comm;
+static unsigned long long (*bpf_perf_event_read)(void *map,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_perf_event_read;
+static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
+ (void *) BPF_FUNC_clone_redirect;
+static int (*bpf_redirect)(int ifindex, int flags) =
+ (void *) BPF_FUNC_redirect;
+static int (*bpf_redirect_map)(void *map, int key, int flags) =
+ (void *) BPF_FUNC_redirect_map;
+static int (*bpf_perf_event_output)(void *ctx, void *map,
+ unsigned long long flags, void *data,
+ int size) =
+ (void *) BPF_FUNC_perf_event_output;
+static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
+ (void *) BPF_FUNC_get_stackid;
+static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
+ (void *) BPF_FUNC_probe_write_user;
+static int (*bpf_current_task_under_cgroup)(void *map, int index) =
+ (void *) BPF_FUNC_current_task_under_cgroup;
+static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) =
+ (void *) BPF_FUNC_skb_get_tunnel_key;
+static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) =
+ (void *) BPF_FUNC_skb_set_tunnel_key;
+static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) =
+ (void *) BPF_FUNC_skb_get_tunnel_opt;
+static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) =
+ (void *) BPF_FUNC_skb_set_tunnel_opt;
+static unsigned long long (*bpf_get_prandom_u32)(void) =
+ (void *) BPF_FUNC_get_prandom_u32;
+static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
+ (void *) BPF_FUNC_xdp_adjust_head;
+static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) =
+ (void *) BPF_FUNC_xdp_adjust_meta;
+static int (*bpf_get_socket_cookie)(void *ctx) =
+ (void *) BPF_FUNC_get_socket_cookie;
+static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
+ int optlen) =
+ (void *) BPF_FUNC_setsockopt;
+static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval,
+ int optlen) =
+ (void *) BPF_FUNC_getsockopt;
+static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) =
+ (void *) BPF_FUNC_sock_ops_cb_flags_set;
+static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
+ (void *) BPF_FUNC_sk_redirect_map;
+static int (*bpf_sk_redirect_hash)(void *ctx, void *map, void *key, int flags) =
+ (void *) BPF_FUNC_sk_redirect_hash;
+static int (*bpf_sock_map_update)(void *map, void *key, void *value,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_sock_map_update;
+static int (*bpf_sock_hash_update)(void *map, void *key, void *value,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_sock_hash_update;
+static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags,
+ void *buf, unsigned int buf_size) =
+ (void *) BPF_FUNC_perf_event_read_value;
+static int (*bpf_perf_prog_read_value)(void *ctx, void *buf,
+ unsigned int buf_size) =
+ (void *) BPF_FUNC_perf_prog_read_value;
+static int (*bpf_override_return)(void *ctx, unsigned long rc) =
+ (void *) BPF_FUNC_override_return;
+static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) =
+ (void *) BPF_FUNC_msg_redirect_map;
+static int (*bpf_msg_redirect_hash)(void *ctx,
+ void *map, void *key, int flags) =
+ (void *) BPF_FUNC_msg_redirect_hash;
+static int (*bpf_msg_apply_bytes)(void *ctx, int len) =
+ (void *) BPF_FUNC_msg_apply_bytes;
+static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
+ (void *) BPF_FUNC_msg_cork_bytes;
+static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
+ (void *) BPF_FUNC_msg_pull_data;
+static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
+ (void *) BPF_FUNC_bind;
+static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
+ (void *) BPF_FUNC_xdp_adjust_tail;
+static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
+ int size, int flags) =
+ (void *) BPF_FUNC_skb_get_xfrm_state;
+static int (*bpf_sk_select_reuseport)(void *ctx, void *map, void *key, __u32 flags) =
+ (void *) BPF_FUNC_sk_select_reuseport;
+static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
+ (void *) BPF_FUNC_get_stack;
+static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
+ int plen, __u32 flags) =
+ (void *) BPF_FUNC_fib_lookup;
+static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr,
+ unsigned int len) =
+ (void *) BPF_FUNC_lwt_push_encap;
+static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset,
+ void *from, unsigned int len) =
+ (void *) BPF_FUNC_lwt_seg6_store_bytes;
+static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param,
+ unsigned int param_len) =
+ (void *) BPF_FUNC_lwt_seg6_action;
+static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset,
+ unsigned int len) =
+ (void *) BPF_FUNC_lwt_seg6_adjust_srh;
+static int (*bpf_rc_repeat)(void *ctx) =
+ (void *) BPF_FUNC_rc_repeat;
+static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol,
+ unsigned long long scancode, unsigned int toggle) =
+ (void *) BPF_FUNC_rc_keydown;
+static unsigned long long (*bpf_get_current_cgroup_id)(void) =
+ (void *) BPF_FUNC_get_current_cgroup_id;
+static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
+ (void *) BPF_FUNC_get_local_storage;
+static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) =
+ (void *) BPF_FUNC_skb_cgroup_id;
+static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) =
+ (void *) BPF_FUNC_skb_ancestor_cgroup_id;
+
+/* llvm builtin functions that eBPF C program may use to
+ * emit BPF_LD_ABS and BPF_LD_IND instructions
+ */
+struct sk_buff;
+unsigned long long load_byte(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.byte");
+unsigned long long load_half(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.half");
+unsigned long long load_word(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.word");
+
+/* a helper structure used by eBPF C program
+ * to describe map attributes to elf_bpf loader
+ */
+struct bpf_map_def {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+ unsigned int map_flags;
+ unsigned int inner_map_idx;
+ unsigned int numa_node;
+};
+
+#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \
+ struct ____btf_map_##name { \
+ type_key key; \
+ type_val value; \
+ }; \
+ struct ____btf_map_##name \
+ __attribute__ ((section(".maps." #name), used)) \
+ ____btf_map_##name = { }
+
+static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
+ (void *) BPF_FUNC_skb_load_bytes;
+static int (*bpf_skb_load_bytes_relative)(void *ctx, int off, void *to, int len, __u32 start_header) =
+ (void *) BPF_FUNC_skb_load_bytes_relative;
+static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
+ (void *) BPF_FUNC_skb_store_bytes;
+static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+ (void *) BPF_FUNC_l3_csum_replace;
+static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+ (void *) BPF_FUNC_l4_csum_replace;
+static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) =
+ (void *) BPF_FUNC_csum_diff;
+static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
+ (void *) BPF_FUNC_skb_under_cgroup;
+static int (*bpf_skb_change_head)(void *, int len, int flags) =
+ (void *) BPF_FUNC_skb_change_head;
+static int (*bpf_skb_pull_data)(void *, int len) =
+ (void *) BPF_FUNC_skb_pull_data;
+
+/* Scan the ARCH passed in from ARCH env variable (see Makefile) */
+#if defined(__TARGET_ARCH_x86)
+ #define bpf_target_x86
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_s930x)
+ #define bpf_target_s930x
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_arm64)
+ #define bpf_target_arm64
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_mips)
+ #define bpf_target_mips
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_powerpc)
+ #define bpf_target_powerpc
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_sparc)
+ #define bpf_target_sparc
+ #define bpf_target_defined
+#else
+ #undef bpf_target_defined
+#endif
+
+/* Fall back to what the compiler says */
+#ifndef bpf_target_defined
+#if defined(__x86_64__)
+ #define bpf_target_x86
+#elif defined(__s390x__)
+ #define bpf_target_s930x
+#elif defined(__aarch64__)
+ #define bpf_target_arm64
+#elif defined(__mips__)
+ #define bpf_target_mips
+#elif defined(__powerpc__)
+ #define bpf_target_powerpc
+#elif defined(__sparc__)
+ #define bpf_target_sparc
+#endif
+#endif
+
+#if defined(bpf_target_x86)
+
+#define PT_REGS_PARM1(x) ((x)->di)
+#define PT_REGS_PARM2(x) ((x)->si)
+#define PT_REGS_PARM3(x) ((x)->dx)
+#define PT_REGS_PARM4(x) ((x)->cx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->sp)
+#define PT_REGS_FP(x) ((x)->bp)
+#define PT_REGS_RC(x) ((x)->ax)
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->ip)
+
+#elif defined(bpf_target_s390x)
+
+#define PT_REGS_PARM1(x) ((x)->gprs[2])
+#define PT_REGS_PARM2(x) ((x)->gprs[3])
+#define PT_REGS_PARM3(x) ((x)->gprs[4])
+#define PT_REGS_PARM4(x) ((x)->gprs[5])
+#define PT_REGS_PARM5(x) ((x)->gprs[6])
+#define PT_REGS_RET(x) ((x)->gprs[14])
+#define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->gprs[2])
+#define PT_REGS_SP(x) ((x)->gprs[15])
+#define PT_REGS_IP(x) ((x)->psw.addr)
+
+#elif defined(bpf_target_arm64)
+
+#define PT_REGS_PARM1(x) ((x)->regs[0])
+#define PT_REGS_PARM2(x) ((x)->regs[1])
+#define PT_REGS_PARM3(x) ((x)->regs[2])
+#define PT_REGS_PARM4(x) ((x)->regs[3])
+#define PT_REGS_PARM5(x) ((x)->regs[4])
+#define PT_REGS_RET(x) ((x)->regs[30])
+#define PT_REGS_FP(x) ((x)->regs[29]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[0])
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->pc)
+
+#elif defined(bpf_target_mips)
+
+#define PT_REGS_PARM1(x) ((x)->regs[4])
+#define PT_REGS_PARM2(x) ((x)->regs[5])
+#define PT_REGS_PARM3(x) ((x)->regs[6])
+#define PT_REGS_PARM4(x) ((x)->regs[7])
+#define PT_REGS_PARM5(x) ((x)->regs[8])
+#define PT_REGS_RET(x) ((x)->regs[31])
+#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[1])
+#define PT_REGS_SP(x) ((x)->regs[29])
+#define PT_REGS_IP(x) ((x)->cp0_epc)
+
+#elif defined(bpf_target_powerpc)
+
+#define PT_REGS_PARM1(x) ((x)->gpr[3])
+#define PT_REGS_PARM2(x) ((x)->gpr[4])
+#define PT_REGS_PARM3(x) ((x)->gpr[5])
+#define PT_REGS_PARM4(x) ((x)->gpr[6])
+#define PT_REGS_PARM5(x) ((x)->gpr[7])
+#define PT_REGS_RC(x) ((x)->gpr[3])
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->nip)
+
+#elif defined(bpf_target_sparc)
+
+#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1])
+#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2])
+#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3])
+#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4])
+#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7])
+#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
+
+/* Should this also be a bpf_target check for the sparc case? */
+#if defined(__arch64__)
+#define PT_REGS_IP(x) ((x)->tpc)
+#else
+#define PT_REGS_IP(x) ((x)->pc)
+#endif
+
+#endif
+
+#ifdef bpf_target_powerpc
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; })
+#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
+#elif bpf_target_sparc
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); })
+#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
+#else
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ \
+ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
+#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ \
+ bpf_probe_read(&(ip), sizeof(ip), \
+ (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
+#endif
+
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_rand.h b/tools/testing/selftests/bpf/bpf_rand.h
new file mode 100644
index 000000000..59bf3e1a9
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_rand.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_RAND__
+#define __BPF_RAND__
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <time.h>
+
+static inline uint64_t bpf_rand_mask(uint64_t mask)
+{
+ return (((uint64_t)(uint32_t)rand()) |
+ ((uint64_t)(uint32_t)rand() << 32)) & mask;
+}
+
+#define bpf_rand_ux(x, m) \
+static inline uint64_t bpf_rand_u##x(int shift) \
+{ \
+ return bpf_rand_mask((m)) << shift; \
+}
+
+bpf_rand_ux( 8, 0xffULL)
+bpf_rand_ux(16, 0xffffULL)
+bpf_rand_ux(24, 0xffffffULL)
+bpf_rand_ux(32, 0xffffffffULL)
+bpf_rand_ux(40, 0xffffffffffULL)
+bpf_rand_ux(48, 0xffffffffffffULL)
+bpf_rand_ux(56, 0xffffffffffffffULL)
+bpf_rand_ux(64, 0xffffffffffffffffULL)
+
+static inline void bpf_semi_rand_init(void)
+{
+ srand(time(NULL));
+}
+
+static inline uint64_t bpf_semi_rand_get(void)
+{
+ switch (rand() % 39) {
+ case 0: return 0x000000ff00000000ULL | bpf_rand_u8(0);
+ case 1: return 0xffffffff00000000ULL | bpf_rand_u16(0);
+ case 2: return 0x00000000ffff0000ULL | bpf_rand_u16(0);
+ case 3: return 0x8000000000000000ULL | bpf_rand_u32(0);
+ case 4: return 0x00000000f0000000ULL | bpf_rand_u32(0);
+ case 5: return 0x0000000100000000ULL | bpf_rand_u24(0);
+ case 6: return 0x800ff00000000000ULL | bpf_rand_u32(0);
+ case 7: return 0x7fffffff00000000ULL | bpf_rand_u32(0);
+ case 8: return 0xffffffffffffff00ULL ^ bpf_rand_u32(24);
+ case 9: return 0xffffffffffffff00ULL | bpf_rand_u8(0);
+ case 10: return 0x0000000010000000ULL | bpf_rand_u32(0);
+ case 11: return 0xf000000000000000ULL | bpf_rand_u8(0);
+ case 12: return 0x0000f00000000000ULL | bpf_rand_u8(8);
+ case 13: return 0x000000000f000000ULL | bpf_rand_u8(16);
+ case 14: return 0x0000000000000f00ULL | bpf_rand_u8(32);
+ case 15: return 0x00fff00000000f00ULL | bpf_rand_u8(48);
+ case 16: return 0x00007fffffffffffULL ^ bpf_rand_u32(1);
+ case 17: return 0xffff800000000000ULL | bpf_rand_u8(4);
+ case 18: return 0xffff800000000000ULL | bpf_rand_u8(20);
+ case 19: return (0xffffffc000000000ULL + 0x80000ULL) | bpf_rand_u32(0);
+ case 20: return (0xffffffc000000000ULL - 0x04000000ULL) | bpf_rand_u32(0);
+ case 21: return 0x0000000000000000ULL | bpf_rand_u8(55) | bpf_rand_u32(20);
+ case 22: return 0xffffffffffffffffULL ^ bpf_rand_u8(3) ^ bpf_rand_u32(40);
+ case 23: return 0x0000000000000000ULL | bpf_rand_u8(bpf_rand_u8(0) % 64);
+ case 24: return 0x0000000000000000ULL | bpf_rand_u16(bpf_rand_u8(0) % 64);
+ case 25: return 0xffffffffffffffffULL ^ bpf_rand_u8(bpf_rand_u8(0) % 64);
+ case 26: return 0xffffffffffffffffULL ^ bpf_rand_u40(bpf_rand_u8(0) % 64);
+ case 27: return 0x0000800000000000ULL;
+ case 28: return 0x8000000000000000ULL;
+ case 29: return 0x0000000000000000ULL;
+ case 30: return 0xffffffffffffffffULL;
+ case 31: return bpf_rand_u16(bpf_rand_u8(0) % 64);
+ case 32: return bpf_rand_u24(bpf_rand_u8(0) % 64);
+ case 33: return bpf_rand_u32(bpf_rand_u8(0) % 64);
+ case 34: return bpf_rand_u40(bpf_rand_u8(0) % 64);
+ case 35: return bpf_rand_u48(bpf_rand_u8(0) % 64);
+ case 36: return bpf_rand_u56(bpf_rand_u8(0) % 64);
+ case 37: return bpf_rand_u64(bpf_rand_u8(0) % 64);
+ default: return bpf_rand_u64(0);
+ }
+}
+
+#endif /* __BPF_RAND__ */
diff --git a/tools/testing/selftests/bpf/bpf_rlimit.h b/tools/testing/selftests/bpf/bpf_rlimit.h
new file mode 100644
index 000000000..9dac9b30f
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_rlimit.h
@@ -0,0 +1,28 @@
+#include <sys/resource.h>
+#include <stdio.h>
+
+static __attribute__((constructor)) void bpf_rlimit_ctor(void)
+{
+ struct rlimit rlim_old, rlim_new = {
+ .rlim_cur = RLIM_INFINITY,
+ .rlim_max = RLIM_INFINITY,
+ };
+
+ getrlimit(RLIMIT_MEMLOCK, &rlim_old);
+ /* For the sake of running the test cases, we temporarily
+ * set rlimit to infinity in order for kernel to focus on
+ * errors from actual test cases and not getting noise
+ * from hitting memlock limits. The limit is on per-process
+ * basis and not a global one, hence destructor not really
+ * needed here.
+ */
+ if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) {
+ perror("Unable to lift memlock rlimit");
+ /* Trying out lower limit, but expect potential test
+ * case failures from this!
+ */
+ rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+ rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+ setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+ }
+}
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
new file mode 100644
index 000000000..84fd6f1bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_UTIL__
+#define __BPF_UTIL__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+static inline unsigned int bpf_num_possible_cpus(void)
+{
+ static const char *fcpu = "/sys/devices/system/cpu/possible";
+ unsigned int start, end, possible_cpus = 0;
+ char buff[128];
+ FILE *fp;
+ int len, n, i, j = 0;
+
+ fp = fopen(fcpu, "r");
+ if (!fp) {
+ printf("Failed to open %s: '%s'!\n", fcpu, strerror(errno));
+ exit(1);
+ }
+
+ if (!fgets(buff, sizeof(buff), fp)) {
+ printf("Failed to read %s!\n", fcpu);
+ exit(1);
+ }
+
+ len = strlen(buff);
+ for (i = 0; i <= len; i++) {
+ if (buff[i] == ',' || buff[i] == '\0') {
+ buff[i] = '\0';
+ n = sscanf(&buff[j], "%u-%u", &start, &end);
+ if (n <= 0) {
+ printf("Failed to retrieve # possible CPUs!\n");
+ exit(1);
+ } else if (n == 1) {
+ end = start;
+ }
+ possible_cpus += end - start + 1;
+ j = i + 1;
+ }
+ }
+
+ fclose(fp);
+
+ return possible_cpus;
+}
+
+#define __bpf_percpu_val_align __attribute__((__aligned__(8)))
+
+#define BPF_DECLARE_PERCPU(type, name) \
+ struct { type v; /* padding */ } __bpf_percpu_val_align \
+ name[bpf_num_possible_cpus()]
+#define bpf_percpu(name, cpu) name[(cpu)].v
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#endif /* __BPF_UTIL__ */
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
new file mode 100644
index 000000000..6af24f9a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/sched.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <ftw.h>
+
+
+#include "cgroup_helpers.h"
+
+/*
+ * To avoid relying on the system setup, when setup_cgroup_env is called
+ * we create a new mount namespace, and cgroup namespace. The cgroup2
+ * root is mounted at CGROUP_MOUNT_PATH
+ *
+ * Unfortunately, most people don't have cgroupv2 enabled at this point in time.
+ * It's easier to create our own mount namespace and manage it ourselves.
+ *
+ * We assume /mnt exists.
+ */
+
+#define WALK_FD_LIMIT 16
+#define CGROUP_MOUNT_PATH "/mnt"
+#define CGROUP_WORK_DIR "/cgroup-test-work-dir"
+#define format_cgroup_path(buf, path) \
+ snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
+ CGROUP_WORK_DIR, path)
+
+/**
+ * setup_cgroup_environment() - Setup the cgroup environment
+ *
+ * After calling this function, cleanup_cgroup_environment should be called
+ * once testing is complete.
+ *
+ * This function will print an error to stderr and return 1 if it is unable
+ * to setup the cgroup environment. If setup is successful, 0 is returned.
+ */
+int setup_cgroup_environment(void)
+{
+ char cgroup_workdir[PATH_MAX - 24];
+
+ format_cgroup_path(cgroup_workdir, "");
+
+ if (unshare(CLONE_NEWNS)) {
+ log_err("unshare");
+ return 1;
+ }
+
+ if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
+ log_err("mount fakeroot");
+ return 1;
+ }
+
+ if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) {
+ log_err("mount cgroup2");
+ return 1;
+ }
+
+ /* Cleanup existing failed runs, now that the environment is setup */
+ cleanup_cgroup_environment();
+
+ if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
+ log_err("mkdir cgroup work dir");
+ return 1;
+ }
+
+ return 0;
+}
+
+static int nftwfunc(const char *filename, const struct stat *statptr,
+ int fileflags, struct FTW *pfwt)
+{
+ if ((fileflags & FTW_D) && rmdir(filename))
+ log_err("Removing cgroup: %s", filename);
+ return 0;
+}
+
+
+static int join_cgroup_from_top(char *cgroup_path)
+{
+ char cgroup_procs_path[PATH_MAX + 1];
+ pid_t pid = getpid();
+ int fd, rc = 0;
+
+ snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),
+ "%s/cgroup.procs", cgroup_path);
+
+ fd = open(cgroup_procs_path, O_WRONLY);
+ if (fd < 0) {
+ log_err("Opening Cgroup Procs: %s", cgroup_procs_path);
+ return 1;
+ }
+
+ if (dprintf(fd, "%d\n", pid) < 0) {
+ log_err("Joining Cgroup");
+ rc = 1;
+ }
+
+ close(fd);
+ return rc;
+}
+
+/**
+ * join_cgroup() - Join a cgroup
+ * @path: The cgroup path, relative to the workdir, to join
+ *
+ * This function expects a cgroup to already be created, relative to the cgroup
+ * work dir, and it joins it. For example, passing "/my-cgroup" as the path
+ * would actually put the calling process into the cgroup
+ * "/cgroup-test-work-dir/my-cgroup"
+ *
+ * On success, it returns 0, otherwise on failure it returns 1.
+ */
+int join_cgroup(const char *path)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_path, path);
+ return join_cgroup_from_top(cgroup_path);
+}
+
+/**
+ * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
+ *
+ * This is an idempotent function to delete all temporary cgroups that
+ * have been created during the test, including the cgroup testing work
+ * directory.
+ *
+ * At call time, it moves the calling process to the root cgroup, and then
+ * runs the deletion process. It is idempotent, and should not fail, unless
+ * a process is lingering.
+ *
+ * On failure, it will print an error to stderr, and try to continue.
+ */
+void cleanup_cgroup_environment(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_workdir, "");
+ join_cgroup_from_top(CGROUP_MOUNT_PATH);
+ nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
+}
+
+/**
+ * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
+ * @path: The cgroup path, relative to the workdir, to join
+ *
+ * This function creates a cgroup under the top level workdir and returns the
+ * file descriptor. It is idempotent.
+ *
+ * On success, it returns the file descriptor. On failure it returns 0.
+ * If there is a failure, it prints the error to stderr.
+ */
+int create_and_get_cgroup(const char *path)
+{
+ char cgroup_path[PATH_MAX + 1];
+ int fd;
+
+ format_cgroup_path(cgroup_path, path);
+ if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
+ log_err("mkdiring cgroup %s .. %s", path, cgroup_path);
+ return 0;
+ }
+
+ fd = open(cgroup_path, O_RDONLY);
+ if (fd < 0) {
+ log_err("Opening Cgroup");
+ return 0;
+ }
+
+ return fd;
+}
+
+/**
+ * get_cgroup_id() - Get cgroup id for a particular cgroup path
+ * @path: The cgroup path, relative to the workdir, to join
+ *
+ * On success, it returns the cgroup id. On failure it returns 0,
+ * which is an invalid cgroup id.
+ * If there is a failure, it prints the error to stderr.
+ */
+unsigned long long get_cgroup_id(const char *path)
+{
+ int dirfd, err, flags, mount_id, fhsize;
+ union {
+ unsigned long long cgid;
+ unsigned char raw_bytes[8];
+ } id;
+ char cgroup_workdir[PATH_MAX + 1];
+ struct file_handle *fhp, *fhp2;
+ unsigned long long ret = 0;
+
+ format_cgroup_path(cgroup_workdir, path);
+
+ dirfd = AT_FDCWD;
+ flags = 0;
+ fhsize = sizeof(*fhp);
+ fhp = calloc(1, fhsize);
+ if (!fhp) {
+ log_err("calloc");
+ return 0;
+ }
+ err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);
+ if (err >= 0 || fhp->handle_bytes != 8) {
+ log_err("name_to_handle_at");
+ goto free_mem;
+ }
+
+ fhsize = sizeof(struct file_handle) + fhp->handle_bytes;
+ fhp2 = realloc(fhp, fhsize);
+ if (!fhp2) {
+ log_err("realloc");
+ goto free_mem;
+ }
+ err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);
+ fhp = fhp2;
+ if (err < 0) {
+ log_err("name_to_handle_at");
+ goto free_mem;
+ }
+
+ memcpy(id.raw_bytes, fhp->f_handle, 8);
+ ret = id.cgid;
+
+free_mem:
+ free(fhp);
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
new file mode 100644
index 000000000..d64bb8957
--- /dev/null
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __CGROUP_HELPERS_H
+#define __CGROUP_HELPERS_H
+#include <errno.h>
+#include <string.h>
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+ __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+
+
+int create_and_get_cgroup(const char *path);
+int join_cgroup(const char *path);
+int setup_cgroup_environment(void);
+void cleanup_cgroup_environment(void);
+unsigned long long get_cgroup_id(const char *path);
+
+#endif
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
new file mode 100644
index 000000000..b4994a949
--- /dev/null
+++ b/tools/testing/selftests/bpf/config
@@ -0,0 +1,20 @@
+CONFIG_BPF=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_BPF_EVENTS=y
+CONFIG_TEST_BPF=m
+CONFIG_CGROUP_BPF=y
+CONFIG_NETDEVSIM=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_IPIP=y
+CONFIG_IPV6=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_NET_IPGRE=y
+CONFIG_IPV6_GRE=y
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_HMAC=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_VXLAN=y
+CONFIG_GENEVE=y
diff --git a/tools/testing/selftests/bpf/connect4_prog.c b/tools/testing/selftests/bpf/connect4_prog.c
new file mode 100644
index 000000000..5a88a681d
--- /dev/null
+++ b/tools/testing/selftests/bpf/connect4_prog.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP4 0x7f000004U
+#define DST_REWRITE_IP4 0x7f000001U
+#define DST_REWRITE_PORT4 4444
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/connect4")
+int connect_v4_prog(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in sa;
+
+ /* Rewrite destination. */
+ ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
+
+ if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
+ ///* Rewrite source. */
+ memset(&sa, 0, sizeof(sa));
+
+ sa.sin_family = AF_INET;
+ sa.sin_port = bpf_htons(0);
+ sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
+
+ if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+ return 0;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/connect6_prog.c b/tools/testing/selftests/bpf/connect6_prog.c
new file mode 100644
index 000000000..8ea3f7d12
--- /dev/null
+++ b/tools/testing/selftests/bpf/connect6_prog.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP6_0 0
+#define SRC_REWRITE_IP6_1 0
+#define SRC_REWRITE_IP6_2 0
+#define SRC_REWRITE_IP6_3 6
+
+#define DST_REWRITE_IP6_0 0
+#define DST_REWRITE_IP6_1 0
+#define DST_REWRITE_IP6_2 0
+#define DST_REWRITE_IP6_3 1
+
+#define DST_REWRITE_PORT6 6666
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/connect6")
+int connect_v6_prog(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in6 sa;
+
+ /* Rewrite destination. */
+ ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
+ ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
+ ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
+ ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
+
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+
+ if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
+ /* Rewrite source. */
+ memset(&sa, 0, sizeof(sa));
+
+ sa.sin6_family = AF_INET6;
+ sa.sin6_port = bpf_htons(0);
+
+ sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+ sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+ sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+ sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+
+ if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+ return 0;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/dev_cgroup.c b/tools/testing/selftests/bpf/dev_cgroup.c
new file mode 100644
index 000000000..ce41a3475
--- /dev/null
+++ b/tools/testing/selftests/bpf/dev_cgroup.c
@@ -0,0 +1,60 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+SEC("cgroup/dev")
+int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx)
+{
+ short type = ctx->access_type & 0xFFFF;
+#ifdef DEBUG
+ short access = ctx->access_type >> 16;
+ char fmt[] = " %d:%d \n";
+
+ switch (type) {
+ case BPF_DEVCG_DEV_BLOCK:
+ fmt[0] = 'b';
+ break;
+ case BPF_DEVCG_DEV_CHAR:
+ fmt[0] = 'c';
+ break;
+ default:
+ fmt[0] = '?';
+ break;
+ }
+
+ if (access & BPF_DEVCG_ACC_READ)
+ fmt[8] = 'r';
+
+ if (access & BPF_DEVCG_ACC_WRITE)
+ fmt[9] = 'w';
+
+ if (access & BPF_DEVCG_ACC_MKNOD)
+ fmt[10] = 'm';
+
+ bpf_trace_printk(fmt, sizeof(fmt), ctx->major, ctx->minor);
+#endif
+
+ /* Allow access to /dev/zero and /dev/random.
+ * Forbid everything else.
+ */
+ if (ctx->major != 1 || type != BPF_DEVCG_DEV_CHAR)
+ return 0;
+
+ switch (ctx->minor) {
+ case 5: /* 1:5 /dev/zero */
+ case 9: /* 1:9 /dev/urandom */
+ return 1;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/get_cgroup_id_kern.c
new file mode 100644
index 000000000..014dba10b
--- /dev/null
+++ b/tools/testing/selftests/bpf/get_cgroup_id_kern.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") cg_ids = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") pidmap = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+SEC("tracepoint/syscalls/sys_enter_nanosleep")
+int trace(void *ctx)
+{
+ __u32 pid = bpf_get_current_pid_tgid();
+ __u32 key = 0, *expected_pid;
+ __u64 *val;
+
+ expected_pid = bpf_map_lookup_elem(&pidmap, &key);
+ if (!expected_pid || *expected_pid != pid)
+ return 0;
+
+ val = bpf_map_lookup_elem(&cg_ids, &key);
+ if (val)
+ *val = bpf_get_current_cgroup_id();
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
new file mode 100644
index 000000000..e8da7b391
--- /dev/null
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <linux/perf_event.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
+
+#define CHECK(condition, tag, format...) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ printf("%s:FAIL:%s ", __func__, tag); \
+ printf(format); \
+ } else { \
+ printf("%s:PASS:%s\n", __func__, tag); \
+ } \
+ __ret; \
+})
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+ const char *name)
+{
+ struct bpf_map *map;
+
+ map = bpf_object__find_map_by_name(obj, name);
+ if (!map)
+ return -1;
+ return bpf_map__fd(map);
+}
+
+#define TEST_CGROUP "/test-bpf-get-cgroup-id/"
+
+int main(int argc, char **argv)
+{
+ const char *probe_name = "syscalls/sys_enter_nanosleep";
+ const char *file = "get_cgroup_id_kern.o";
+ int err, bytes, efd, prog_fd, pmu_fd;
+ int cgroup_fd, cgidmap_fd, pidmap_fd;
+ struct perf_event_attr attr = {};
+ struct bpf_object *obj;
+ __u64 kcgid = 0, ucgid;
+ __u32 key = 0, pid;
+ int exit_code = 1;
+ char buf[256];
+
+ err = setup_cgroup_environment();
+ if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err,
+ errno))
+ return 1;
+
+ cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+ if (CHECK(cgroup_fd < 0, "create_and_get_cgroup", "err %d errno %d\n",
+ cgroup_fd, errno))
+ goto cleanup_cgroup_env;
+
+ err = join_cgroup(TEST_CGROUP);
+ if (CHECK(err, "join_cgroup", "err %d errno %d\n", err, errno))
+ goto cleanup_cgroup_env;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+ goto cleanup_cgroup_env;
+
+ cgidmap_fd = bpf_find_map(__func__, obj, "cg_ids");
+ if (CHECK(cgidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+ cgidmap_fd, errno))
+ goto close_prog;
+
+ pidmap_fd = bpf_find_map(__func__, obj, "pidmap");
+ if (CHECK(pidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+ pidmap_fd, errno))
+ goto close_prog;
+
+ pid = getpid();
+ bpf_map_update_elem(pidmap_fd, &key, &pid, 0);
+
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
+ "bytes %d errno %d\n", bytes, errno))
+ goto close_prog;
+
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+
+ /* attach to this pid so the all bpf invocations will be in the
+ * cgroup associated with this pid.
+ */
+ pmu_fd = syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
+ errno))
+ goto close_prog;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ /* trigger some syscalls */
+ sleep(1);
+
+ err = bpf_map_lookup_elem(cgidmap_fd, &key, &kcgid);
+ if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno))
+ goto close_pmu;
+
+ ucgid = get_cgroup_id(TEST_CGROUP);
+ if (CHECK(kcgid != ucgid, "compare_cgroup_id",
+ "kern cgid %llx user cgid %llx", kcgid, ucgid))
+ goto close_pmu;
+
+ exit_code = 0;
+ printf("%s:PASS\n", argv[0]);
+
+close_pmu:
+ close(pmu_fd);
+close_prog:
+ bpf_object__close(obj);
+cleanup_cgroup_env:
+ cleanup_cgroup_environment();
+ return exit_code;
+}
diff --git a/tools/testing/selftests/bpf/gnu/stubs.h b/tools/testing/selftests/bpf/gnu/stubs.h
new file mode 100644
index 000000000..719225b16
--- /dev/null
+++ b/tools/testing/selftests/bpf/gnu/stubs.h
@@ -0,0 +1 @@
+/* dummy .h to trick /usr/include/features.h to work with 'clang -target bpf' */
diff --git a/tools/testing/selftests/bpf/include/uapi/linux/types.h b/tools/testing/selftests/bpf/include/uapi/linux/types.h
new file mode 100644
index 000000000..91fa51a9c
--- /dev/null
+++ b/tools/testing/selftests/bpf/include/uapi/linux/types.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_LINUX_TYPES_H
+#define _UAPI_LINUX_TYPES_H
+
+#include <asm-generic/int-ll64.h>
+
+/* copied from linux:include/uapi/linux/types.h */
+#define __bitwise
+typedef __u16 __bitwise __le16;
+typedef __u16 __bitwise __be16;
+typedef __u32 __bitwise __le32;
+typedef __u32 __bitwise __be32;
+typedef __u64 __bitwise __le64;
+typedef __u64 __bitwise __be64;
+
+typedef __u16 __bitwise __sum16;
+typedef __u32 __bitwise __wsum;
+
+#define __aligned_u64 __u64 __attribute__((aligned(8)))
+#define __aligned_be64 __be64 __attribute__((aligned(8)))
+#define __aligned_le64 __le64 __attribute__((aligned(8)))
+
+#endif /* _UAPI_LINUX_TYPES_H */
diff --git a/tools/testing/selftests/bpf/sample_map_ret0.c b/tools/testing/selftests/bpf/sample_map_ret0.c
new file mode 100644
index 000000000..075630367
--- /dev/null
+++ b/tools/testing/selftests/bpf/sample_map_ret0.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") htab = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(long),
+ .max_entries = 2,
+};
+
+struct bpf_map_def SEC("maps") array = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(long),
+ .max_entries = 2,
+};
+
+/* Sample program which should always load for testing control paths. */
+SEC(".text") int func()
+{
+ __u64 key64 = 0;
+ __u32 key = 0;
+ long *value;
+
+ value = bpf_map_lookup_elem(&htab, &key);
+ if (!value)
+ return 1;
+ value = bpf_map_lookup_elem(&array, &key64);
+ if (!value)
+ return 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/sample_ret0.c b/tools/testing/selftests/bpf/sample_ret0.c
new file mode 100644
index 000000000..fec99750d
--- /dev/null
+++ b/tools/testing/selftests/bpf/sample_ret0.c
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+
+/* Sample program which should always load for testing control paths. */
+int func()
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/sendmsg4_prog.c b/tools/testing/selftests/bpf/sendmsg4_prog.c
new file mode 100644
index 000000000..a91536b1c
--- /dev/null
+++ b/tools/testing/selftests/bpf/sendmsg4_prog.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC1_IP4 0xAC100001U /* 172.16.0.1 */
+#define SRC2_IP4 0x00000000U
+#define SRC_REWRITE_IP4 0x7f000004U
+#define DST_IP4 0xC0A801FEU /* 192.168.1.254 */
+#define DST_REWRITE_IP4 0x7f000001U
+#define DST_PORT 4040
+#define DST_REWRITE_PORT4 4444
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/sendmsg4")
+int sendmsg_v4_prog(struct bpf_sock_addr *ctx)
+{
+ if (ctx->type != SOCK_DGRAM)
+ return 0;
+
+ /* Rewrite source. */
+ if (ctx->msg_src_ip4 == bpf_htonl(SRC1_IP4) ||
+ ctx->msg_src_ip4 == bpf_htonl(SRC2_IP4)) {
+ ctx->msg_src_ip4 = bpf_htonl(SRC_REWRITE_IP4);
+ } else {
+ /* Unexpected source. Reject sendmsg. */
+ return 0;
+ }
+
+ /* Rewrite destination. */
+ if ((ctx->user_ip4 >> 24) == (bpf_htonl(DST_IP4) >> 24) &&
+ ctx->user_port == bpf_htons(DST_PORT)) {
+ ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
+ } else {
+ /* Unexpected source. Reject sendmsg. */
+ return 0;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sendmsg6_prog.c b/tools/testing/selftests/bpf/sendmsg6_prog.c
new file mode 100644
index 000000000..a68062820
--- /dev/null
+++ b/tools/testing/selftests/bpf/sendmsg6_prog.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP6_0 0
+#define SRC_REWRITE_IP6_1 0
+#define SRC_REWRITE_IP6_2 0
+#define SRC_REWRITE_IP6_3 6
+
+#define DST_REWRITE_IP6_0 0
+#define DST_REWRITE_IP6_1 0
+#define DST_REWRITE_IP6_2 0
+#define DST_REWRITE_IP6_3 1
+
+#define DST_REWRITE_PORT6 6666
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_prog(struct bpf_sock_addr *ctx)
+{
+ if (ctx->type != SOCK_DGRAM)
+ return 0;
+
+ /* Rewrite source. */
+ if (ctx->msg_src_ip6[3] == bpf_htonl(1) ||
+ ctx->msg_src_ip6[3] == bpf_htonl(0)) {
+ ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+ ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+ ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+ ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+ } else {
+ /* Unexpected source. Reject sendmsg. */
+ return 0;
+ }
+
+ /* Rewrite destination. */
+ if (ctx->user_ip6[0] == bpf_htonl(0xFACEB00C)) {
+ ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
+ ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
+ ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
+ ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
+
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+ } else {
+ /* Unexpected destination. Reject sendmsg. */
+ return 0;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/socket_cookie_prog.c b/tools/testing/selftests/bpf/socket_cookie_prog.c
new file mode 100644
index 000000000..9ff8ac4b0
--- /dev/null
+++ b/tools/testing/selftests/bpf/socket_cookie_prog.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+struct bpf_map_def SEC("maps") socket_cookies = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u64),
+ .value_size = sizeof(__u32),
+ .max_entries = 1 << 8,
+};
+
+SEC("cgroup/connect6")
+int set_cookie(struct bpf_sock_addr *ctx)
+{
+ __u32 cookie_value = 0xFF;
+ __u64 cookie_key;
+
+ if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
+ return 1;
+
+ cookie_key = bpf_get_socket_cookie(ctx);
+ if (bpf_map_update_elem(&socket_cookies, &cookie_key, &cookie_value, 0))
+ return 0;
+
+ return 1;
+}
+
+SEC("sockops")
+int update_cookie(struct bpf_sock_ops *ctx)
+{
+ __u32 new_cookie_value;
+ __u32 *cookie_value;
+ __u64 cookie_key;
+
+ if (ctx->family != AF_INET6)
+ return 1;
+
+ if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
+ return 1;
+
+ cookie_key = bpf_get_socket_cookie(ctx);
+
+ cookie_value = bpf_map_lookup_elem(&socket_cookies, &cookie_key);
+ if (!cookie_value)
+ return 1;
+
+ new_cookie_value = (ctx->local_port << 8) | *cookie_value;
+ bpf_map_update_elem(&socket_cookies, &cookie_key, &new_cookie_value, 0);
+
+ return 1;
+}
+
+int _version SEC("version") = 1;
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sockmap_parse_prog.c b/tools/testing/selftests/bpf/sockmap_parse_prog.c
new file mode 100644
index 000000000..0f92858f6
--- /dev/null
+++ b/tools/testing/selftests/bpf/sockmap_parse_prog.c
@@ -0,0 +1,46 @@
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+SEC("sk_skb1")
+int bpf_prog1(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long) skb->data_end;
+ void *data = (void *)(long) skb->data;
+ __u32 lport = skb->local_port;
+ __u32 rport = skb->remote_port;
+ __u8 *d = data;
+ __u32 len = (__u32) data_end - (__u32) data;
+ int err;
+
+ if (data + 10 > data_end) {
+ err = bpf_skb_pull_data(skb, 10);
+ if (err)
+ return SK_DROP;
+
+ data_end = (void *)(long)skb->data_end;
+ data = (void *)(long)skb->data;
+ if (data + 10 > data_end)
+ return SK_DROP;
+ }
+
+ /* This write/read is a bit pointless but tests the verifier and
+ * strparser handler for read/write pkt data and access into sk
+ * fields.
+ */
+ d = data;
+ d[7] = 1;
+ return skb->len;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c
new file mode 100644
index 000000000..12a7b5c82
--- /dev/null
+++ b/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c
@@ -0,0 +1,33 @@
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+SEC("sk_msg1")
+int bpf_prog1(struct sk_msg_md *msg)
+{
+ void *data_end = (void *)(long) msg->data_end;
+ void *data = (void *)(long) msg->data;
+
+ char *d;
+
+ if (data + 8 > data_end)
+ return SK_DROP;
+
+ bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data);
+ d = (char *)data;
+ bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]);
+
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
new file mode 100644
index 000000000..2ce7634a4
--- /dev/null
+++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
@@ -0,0 +1,73 @@
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+struct bpf_map_def SEC("maps") sock_map_rx = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_tx = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_msg = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_break = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+SEC("sk_skb2")
+int bpf_prog2(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long) skb->data_end;
+ void *data = (void *)(long) skb->data;
+ __u32 lport = skb->local_port;
+ __u32 rport = skb->remote_port;
+ __u8 *d = data;
+ __u8 sk, map;
+
+ if (data + 8 > data_end)
+ return SK_DROP;
+
+ map = d[0];
+ sk = d[1];
+
+ d[0] = 0xd;
+ d[1] = 0xe;
+ d[2] = 0xa;
+ d[3] = 0xd;
+ d[4] = 0xb;
+ d[5] = 0xe;
+ d[6] = 0xe;
+ d[7] = 0xf;
+
+ if (!map)
+ return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0);
+ return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
new file mode 100755
index 000000000..7f8200a87
--- /dev/null
+++ b/tools/testing/selftests/bpf/tcp_client.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+
+import sys, os, os.path, getopt
+import socket, time
+import subprocess
+import select
+
+def read(sock, n):
+ buf = b''
+ while len(buf) < n:
+ rem = n - len(buf)
+ try: s = sock.recv(rem)
+ except (socket.error) as e: return b''
+ buf += s
+ return buf
+
+def send(sock, s):
+ total = len(s)
+ count = 0
+ while count < total:
+ try: n = sock.send(s)
+ except (socket.error) as e: n = 0
+ if n == 0:
+ return count;
+ count += n
+ return count
+
+
+serverPort = int(sys.argv[1])
+HostName = socket.gethostname()
+
+# create active socket
+sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+try:
+ sock.connect((HostName, serverPort))
+except socket.error as e:
+ sys.exit(1)
+
+buf = b''
+n = 0
+while n < 1000:
+ buf += b'+'
+ n += 1
+
+sock.settimeout(1);
+n = send(sock, buf)
+n = read(sock, 500)
+sys.exit(0)
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
new file mode 100755
index 000000000..b39903fca
--- /dev/null
+++ b/tools/testing/selftests/bpf/tcp_server.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+
+import sys, os, os.path, getopt
+import socket, time
+import subprocess
+import select
+
+def read(sock, n):
+ buf = b''
+ while len(buf) < n:
+ rem = n - len(buf)
+ try: s = sock.recv(rem)
+ except (socket.error) as e: return b''
+ buf += s
+ return buf
+
+def send(sock, s):
+ total = len(s)
+ count = 0
+ while count < total:
+ try: n = sock.send(s)
+ except (socket.error) as e: n = 0
+ if n == 0:
+ return count;
+ count += n
+ return count
+
+
+SERVER_PORT = 12877
+MAX_PORTS = 2
+
+serverPort = SERVER_PORT
+serverSocket = None
+
+HostName = socket.gethostname()
+
+# create passive socket
+serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+host = socket.gethostname()
+
+try: serverSocket.bind((host, 0))
+except socket.error as msg:
+ print('bind fails: ' + str(msg))
+
+sn = serverSocket.getsockname()
+serverPort = sn[1]
+
+cmdStr = ("./tcp_client.py %d &") % (serverPort)
+os.system(cmdStr)
+
+buf = b''
+n = 0
+while n < 500:
+ buf += b'.'
+ n += 1
+
+serverSocket.listen(MAX_PORTS)
+readList = [serverSocket]
+
+while True:
+ readyRead, readyWrite, inError = \
+ select.select(readList, [], [], 2)
+
+ if len(readyRead) > 0:
+ waitCount = 0
+ for sock in readyRead:
+ if sock == serverSocket:
+ (clientSocket, address) = serverSocket.accept()
+ address = str(address[0])
+ readList.append(clientSocket)
+ else:
+ sock.settimeout(1);
+ s = read(sock, 1000)
+ n = send(sock, buf)
+ sock.close()
+ serverSocket.close()
+ sys.exit(0)
+ else:
+ print('Select timeout!')
+ sys.exit(1)
diff --git a/tools/testing/selftests/bpf/test_adjust_tail.c b/tools/testing/selftests/bpf/test_adjust_tail.c
new file mode 100644
index 000000000..4cd5e860c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_adjust_tail.c
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+SEC("xdp_adjust_tail")
+int _xdp_adjust_tail(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ int offset = 0;
+
+ if (data_end - data == 54)
+ offset = 256;
+ else
+ offset = 20;
+ if (bpf_xdp_adjust_tail(xdp, 0 - offset))
+ return XDP_DROP;
+ return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
new file mode 100644
index 000000000..3c789d03b
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -0,0 +1,719 @@
+#include <asm/types.h>
+#include <linux/types.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+#include <linux/unistd.h>
+#include <linux/filter.h>
+#include <linux/bpf_perf_event.h>
+#include <linux/bpf.h>
+
+#include <bpf/bpf.h>
+
+#include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+
+#define MAX_INSNS 512
+#define MAX_MATCHES 16
+
+struct bpf_reg_match {
+ unsigned int line;
+ const char *match;
+};
+
+struct bpf_align_test {
+ const char *descr;
+ struct bpf_insn insns[MAX_INSNS];
+ enum {
+ UNDEF,
+ ACCEPT,
+ REJECT
+ } result;
+ enum bpf_prog_type prog_type;
+ /* Matches must be in order of increasing line */
+ struct bpf_reg_match matches[MAX_MATCHES];
+};
+
+static struct bpf_align_test tests[] = {
+ /* Four tests of known constants. These aren't staggeringly
+ * interesting since we track exact values now.
+ */
+ {
+ .descr = "mov",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 16),
+ BPF_MOV64_IMM(BPF_REG_3, 32),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ {1, "R1=ctx(id=0,off=0,imm=0)"},
+ {1, "R10=fp0"},
+ {1, "R3_w=inv2"},
+ {2, "R3_w=inv4"},
+ {3, "R3_w=inv8"},
+ {4, "R3_w=inv16"},
+ {5, "R3_w=inv32"},
+ },
+ },
+ {
+ .descr = "shift",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_3, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 32),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ {1, "R1=ctx(id=0,off=0,imm=0)"},
+ {1, "R10=fp0"},
+ {1, "R3_w=inv1"},
+ {2, "R3_w=inv2"},
+ {3, "R3_w=inv4"},
+ {4, "R3_w=inv8"},
+ {5, "R3_w=inv16"},
+ {6, "R3_w=inv1"},
+ {7, "R4_w=inv32"},
+ {8, "R4_w=inv16"},
+ {9, "R4_w=inv8"},
+ {10, "R4_w=inv4"},
+ {11, "R4_w=inv2"},
+ },
+ },
+ {
+ .descr = "addsub",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ {1, "R1=ctx(id=0,off=0,imm=0)"},
+ {1, "R10=fp0"},
+ {1, "R3_w=inv4"},
+ {2, "R3_w=inv8"},
+ {3, "R3_w=inv10"},
+ {4, "R4_w=inv8"},
+ {5, "R4_w=inv12"},
+ {6, "R4_w=inv14"},
+ },
+ },
+ {
+ .descr = "mul",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 7),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 2),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ {1, "R1=ctx(id=0,off=0,imm=0)"},
+ {1, "R10=fp0"},
+ {1, "R3_w=inv7"},
+ {2, "R3_w=inv7"},
+ {3, "R3_w=inv14"},
+ {4, "R3_w=inv56"},
+ },
+ },
+
+ /* Tests using unknown values */
+#define PREP_PKT_POINTERS \
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, \
+ offsetof(struct __sk_buff, data)), \
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, \
+ offsetof(struct __sk_buff, data_end))
+
+#define LOAD_UNKNOWN(DST_REG) \
+ PREP_PKT_POINTERS, \
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), \
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), \
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 1), \
+ BPF_EXIT_INSN(), \
+ BPF_LDX_MEM(BPF_B, DST_REG, BPF_REG_2, 0)
+
+ {
+ .descr = "unknown shift",
+ .insns = {
+ LOAD_UNKNOWN(BPF_REG_3),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+ LOAD_UNKNOWN(BPF_REG_4),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 5),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ {7, "R0=pkt(id=0,off=8,r=8,imm=0)"},
+ {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+ {9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+ {11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+ {18, "R3=pkt_end(id=0,off=0,imm=0)"},
+ {18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
+ {20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+ {21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+ {22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+ },
+ },
+ {
+ .descr = "unknown mul",
+ .insns = {
+ LOAD_UNKNOWN(BPF_REG_3),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 1),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 4),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 8),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+ {12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+ {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+ },
+ },
+ {
+ .descr = "packet const offset",
+ .insns = {
+ PREP_PKT_POINTERS,
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+
+ /* Skip over ethernet header. */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 3),
+ BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 0),
+ BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
+ {5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"},
+ {6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"},
+ {10, "R2=pkt(id=0,off=0,r=18,imm=0)"},
+ {10, "R5=pkt(id=0,off=14,r=18,imm=0)"},
+ {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+ {15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+ },
+ },
+ {
+ .descr = "packet variable offset",
+ .insns = {
+ LOAD_UNKNOWN(BPF_REG_6),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+
+ /* First, add a constant to the R5 packet pointer,
+ * then a variable with a known alignment.
+ */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+ /* Now, test in the other direction. Adding first
+ * the variable offset to R5, then the constant.
+ */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+ /* Test multiple accumulations of unknown values
+ * into a packet pointer.
+ */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ /* Calculated offset in R6 has unknown value, but known
+ * alignment of 4.
+ */
+ {8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+ {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Offset is added to packet pointer R5, resulting in
+ * known fixed offset, and variable offset from R6.
+ */
+ {11, "R5_w=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* At the time the word size load is performed from R5,
+ * it's total offset is NET_IP_ALIGN + reg->off (0) +
+ * reg->aux_off (14) which is 16. Then the variable
+ * offset is considered using reg->aux_off_align which
+ * is 4 and meets the load's requirements.
+ */
+ {15, "R4=pkt(id=1,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {15, "R5=pkt(id=1,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Variable offset is added to R5 packet pointer,
+ * resulting in auxiliary alignment of 4.
+ */
+ {18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Constant offset is added to R5, resulting in
+ * reg->off of 14.
+ */
+ {19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off
+ * (14) which is 16. Then the variable offset is 4-byte
+ * aligned, so the total offset is 4-byte aligned and
+ * meets the load's requirements.
+ */
+ {23, "R4=pkt(id=2,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {23, "R5=pkt(id=2,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Constant offset is added to R5 packet pointer,
+ * resulting in reg->off value of 14.
+ */
+ {26, "R5_w=pkt(id=0,off=14,r=8"},
+ /* Variable offset is added to R5, resulting in a
+ * variable offset of (4n).
+ */
+ {27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Constant is added to R5 again, setting reg->off to 18. */
+ {28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* And once more we add a variable; resulting var_off
+ * is still (4n), fixed offset is not changed.
+ * Also, we create a new reg->id.
+ */
+ {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"},
+ /* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off (18)
+ * which is 20. Then the variable offset is (4n), so
+ * the total offset is 4-byte aligned and meets the
+ * load's requirements.
+ */
+ {33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
+ {33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
+ },
+ },
+ {
+ .descr = "packet variable offset 2",
+ .insns = {
+ /* Create an unknown offset, (4n+2)-aligned */
+ LOAD_UNKNOWN(BPF_REG_6),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+ /* Add it to the packet pointer */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ /* Check bounds and perform a read */
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+ /* Make a (4n) offset from the value we just read */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xff),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+ /* Add it to the packet pointer */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ /* Check bounds and perform a read */
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ /* Calculated offset in R6 has unknown value, but known
+ * alignment of 4.
+ */
+ {8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+ {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Adding 14 makes R6 be (4n+2) */
+ {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ /* Packet pointer has (4n+2) offset */
+ {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ /* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+ * which is 2. Then the variable offset is (4n+2), so
+ * the total offset is 4-byte aligned and meets the
+ * load's requirements.
+ */
+ {15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ /* Newly read value in R6 was shifted left by 2, so has
+ * known alignment of 4.
+ */
+ {18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Added (4n) to packet pointer's (4n+2) var_off, giving
+ * another (4n+2).
+ */
+ {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+ {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+ /* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+ * which is 2. Then the variable offset is (4n+2), so
+ * the total offset is 4-byte aligned and meets the
+ * load's requirements.
+ */
+ {23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+ },
+ },
+ {
+ .descr = "dubious pointer arithmetic",
+ .insns = {
+ PREP_PKT_POINTERS,
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ /* (ptr - ptr) << 2 */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2),
+ /* We have a (4n) value. Let's make a packet offset
+ * out of it. First add 14, to make it a (4n+2)
+ */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+ /* Then make sure it's nonnegative */
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_5, 0, 1),
+ BPF_EXIT_INSN(),
+ /* Add it to packet pointer */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+ /* Check bounds and perform a read */
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .matches = {
+ {4, "R5_w=pkt_end(id=0,off=0,imm=0)"},
+ /* (ptr - ptr) << 2 == unknown, (4n) */
+ {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
+ /* (4n) + 14 == (4n+2). We blow our bounds, because
+ * the add could overflow.
+ */
+ {7, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
+ /* Checked s>=0 */
+ {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ /* packet pointer + nonnegative (4n+2) */
+ {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ {13, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
+ * We checked the bounds, but it might have been able
+ * to overflow if the packet pointer started in the
+ * upper half of the address space.
+ * So we did not get a 'range' on R6, and the access
+ * attempt will fail.
+ */
+ {15, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ }
+ },
+ {
+ .descr = "variable subtraction",
+ .insns = {
+ /* Create an unknown offset, (4n+2)-aligned */
+ LOAD_UNKNOWN(BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+ /* Create another unknown, (4n)-aligned, and subtract
+ * it from the first one
+ */
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_7),
+ /* Bounds-check the result */
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_6, 0, 1),
+ BPF_EXIT_INSN(),
+ /* Add it to the packet pointer */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ /* Check bounds and perform a read */
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ /* Calculated offset in R6 has unknown value, but known
+ * alignment of 4.
+ */
+ {7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+ {9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Adding 14 makes R6 be (4n+2) */
+ {10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ /* New unknown value in R7 is (4n) */
+ {11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Subtracting it from R6 blows our unsigned bounds */
+ {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"},
+ /* Checked s>= 0 */
+ {14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ /* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+ * which is 2. Then the variable offset is (4n+2), so
+ * the total offset is 4-byte aligned and meets the
+ * load's requirements.
+ */
+ {20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ },
+ },
+ {
+ .descr = "pointer variable subtraction",
+ .insns = {
+ /* Create an unknown offset, (4n+2)-aligned and bounded
+ * to [14,74]
+ */
+ LOAD_UNKNOWN(BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xf),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+ /* Subtract it from the packet pointer */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_6),
+ /* Create another unknown, (4n)-aligned and >= 74.
+ * That in fact means >= 76, since 74 % 4 == 2
+ */
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 76),
+ /* Add it to the packet pointer */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_7),
+ /* Check bounds and perform a read */
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ /* Calculated offset in R6 has unknown value, but known
+ * alignment of 4.
+ */
+ {7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+ {10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
+ /* Adding 14 makes R6 be (4n+2) */
+ {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
+ /* Subtracting from packet pointer overflows ubounds */
+ {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"},
+ /* New unknown value in R7 is (4n), >= 76 */
+ {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
+ /* Adding it to packet pointer gives nice bounds again */
+ {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+ /* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+ * which is 2. Then the variable offset is (4n+2), so
+ * the total offset is 4-byte aligned and meets the
+ * load's requirements.
+ */
+ {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+ },
+ },
+};
+
+static int probe_filter_length(const struct bpf_insn *fp)
+{
+ int len;
+
+ for (len = MAX_INSNS - 1; len > 0; --len)
+ if (fp[len].code != 0 || fp[len].imm != 0)
+ break;
+ return len + 1;
+}
+
+static char bpf_vlog[32768];
+
+static int do_test_single(struct bpf_align_test *test)
+{
+ struct bpf_insn *prog = test->insns;
+ int prog_type = test->prog_type;
+ char bpf_vlog_copy[32768];
+ const char *line_ptr;
+ int cur_line = -1;
+ int prog_len, i;
+ int fd_prog;
+ int ret;
+
+ prog_len = probe_filter_length(prog);
+ fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
+ prog, prog_len, BPF_F_STRICT_ALIGNMENT,
+ "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 2);
+ if (fd_prog < 0 && test->result != REJECT) {
+ printf("Failed to load program.\n");
+ printf("%s", bpf_vlog);
+ ret = 1;
+ } else if (fd_prog >= 0 && test->result == REJECT) {
+ printf("Unexpected success to load!\n");
+ printf("%s", bpf_vlog);
+ ret = 1;
+ close(fd_prog);
+ } else {
+ ret = 0;
+ /* We make a local copy so that we can strtok() it */
+ strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy));
+ line_ptr = strtok(bpf_vlog_copy, "\n");
+ for (i = 0; i < MAX_MATCHES; i++) {
+ struct bpf_reg_match m = test->matches[i];
+
+ if (!m.match)
+ break;
+ while (line_ptr) {
+ cur_line = -1;
+ sscanf(line_ptr, "%u: ", &cur_line);
+ if (cur_line == m.line)
+ break;
+ line_ptr = strtok(NULL, "\n");
+ }
+ if (!line_ptr) {
+ printf("Failed to find line %u for match: %s\n",
+ m.line, m.match);
+ ret = 1;
+ printf("%s", bpf_vlog);
+ break;
+ }
+ if (!strstr(line_ptr, m.match)) {
+ printf("Failed to find match %u: %s\n",
+ m.line, m.match);
+ ret = 1;
+ printf("%s", bpf_vlog);
+ break;
+ }
+ }
+ if (fd_prog >= 0)
+ close(fd_prog);
+ }
+ return ret;
+}
+
+static int do_test(unsigned int from, unsigned int to)
+{
+ int all_pass = 0;
+ int all_fail = 0;
+ unsigned int i;
+
+ for (i = from; i < to; i++) {
+ struct bpf_align_test *test = &tests[i];
+ int fail;
+
+ printf("Test %3d: %s ... ",
+ i, test->descr);
+ fail = do_test_single(test);
+ if (fail) {
+ all_fail++;
+ printf("FAIL\n");
+ } else {
+ all_pass++;
+ printf("PASS\n");
+ }
+ }
+ printf("Results: %d pass %d fail\n",
+ all_pass, all_fail);
+ return all_fail ? EXIT_FAILURE : EXIT_SUCCESS;
+}
+
+int main(int argc, char **argv)
+{
+ unsigned int from = 0, to = ARRAY_SIZE(tests);
+
+ if (argc == 3) {
+ unsigned int l = atoi(argv[argc - 2]);
+ unsigned int u = atoi(argv[argc - 1]);
+
+ if (l < to && u < to) {
+ from = l;
+ to = u + 1;
+ }
+ } else if (argc == 2) {
+ unsigned int t = atoi(argv[argc - 1]);
+
+ if (t < to) {
+ from = t;
+ to = t + 1;
+ }
+ }
+ return do_test(from, to);
+}
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
new file mode 100644
index 000000000..29116366a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -0,0 +1,2840 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/err.h>
+#include <bpf/bpf.h>
+#include <sys/resource.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+
+static uint32_t pass_cnt;
+static uint32_t error_cnt;
+static uint32_t skip_cnt;
+
+#define CHECK(condition, format...) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \
+ fprintf(stderr, format); \
+ } \
+ __ret; \
+})
+
+static int count_result(int err)
+{
+ if (err)
+ error_cnt++;
+ else
+ pass_cnt++;
+
+ fprintf(stderr, "\n");
+ return err;
+}
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define __printf(a, b) __attribute__((format(printf, a, b)))
+
+__printf(1, 2)
+static int __base_pr(const char *format, ...)
+{
+ va_list args;
+ int err;
+
+ va_start(args, format);
+ err = vfprintf(stderr, format, args);
+ va_end(args);
+ return err;
+}
+
+#define BTF_INFO_ENC(kind, root, vlen) \
+ ((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+
+#define BTF_TYPE_ENC(name, info, size_or_type) \
+ (name), (info), (size_or_type)
+
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \
+ ((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
+ BTF_INT_ENC(encoding, bits_offset, bits)
+
+#define BTF_ARRAY_ENC(type, index_type, nr_elems) \
+ (type), (index_type), (nr_elems)
+#define BTF_TYPE_ARRAY_ENC(type, index_type, nr_elems) \
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), \
+ BTF_ARRAY_ENC(type, index_type, nr_elems)
+
+#define BTF_MEMBER_ENC(name, type, bits_offset) \
+ (name), (type), (bits_offset)
+#define BTF_ENUM_ENC(name, val) (name), (val)
+
+#define BTF_TYPEDEF_ENC(name, type) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type)
+
+#define BTF_PTR_ENC(name, type) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type)
+
+#define BTF_END_RAW 0xdeadbeef
+#define NAME_TBD 0xdeadb33f
+
+#define MAX_NR_RAW_TYPES 1024
+#define BTF_LOG_BUF_SIZE 65535
+
+static struct args {
+ unsigned int raw_test_num;
+ unsigned int file_test_num;
+ unsigned int get_info_test_num;
+ bool raw_test;
+ bool file_test;
+ bool get_info_test;
+ bool pprint_test;
+ bool always_log;
+} args;
+
+static char btf_log_buf[BTF_LOG_BUF_SIZE];
+
+static struct btf_header hdr_tmpl = {
+ .magic = BTF_MAGIC,
+ .version = BTF_VERSION,
+ .hdr_len = sizeof(struct btf_header),
+};
+
+struct btf_raw_test {
+ const char *descr;
+ const char *str_sec;
+ const char *map_name;
+ const char *err_str;
+ __u32 raw_types[MAX_NR_RAW_TYPES];
+ __u32 str_sec_size;
+ enum bpf_map_type map_type;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 key_type_id;
+ __u32 value_type_id;
+ __u32 max_entries;
+ bool btf_load_err;
+ bool map_create_err;
+ bool ordered_map;
+ bool lossless_map;
+ int hdr_len_delta;
+ int type_off_delta;
+ int str_off_delta;
+ int str_len_delta;
+};
+
+static struct btf_raw_test raw_tests[] = {
+/* enum E {
+ * E0,
+ * E1,
+ * };
+ *
+ * struct A {
+ * unsigned long long m;
+ * int n;
+ * char o;
+ * [3 bytes hole]
+ * int p[8];
+ * int q[4][8];
+ * enum E r;
+ * };
+ */
+{
+ .descr = "struct test #1",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 6), 180),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ BTF_MEMBER_ENC(NAME_TBD, 6, 384),/* int q[4][8] */
+ BTF_MEMBER_ENC(NAME_TBD, 7, 1408), /* enum E r */
+ /* } */
+ /* int[4][8] */
+ BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [6] */
+ /* enum E */ /* [7] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
+ BTF_ENUM_ENC(NAME_TBD, 0),
+ BTF_ENUM_ENC(NAME_TBD, 1),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_test1_map",
+ .key_size = sizeof(int),
+ .value_size = 180,
+ .key_type_id = 1,
+ .value_type_id = 5,
+ .max_entries = 4,
+},
+
+/* typedef struct b Struct_B;
+ *
+ * struct A {
+ * int m;
+ * struct b n[4];
+ * const Struct_B o[4];
+ * };
+ *
+ * struct B {
+ * int m;
+ * int n;
+ * };
+ */
+{
+ .descr = "struct test #2",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* struct b [4] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(4, 1, 4),
+
+ /* struct A { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 3), 68),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct B n[4] */
+ BTF_MEMBER_ENC(NAME_TBD, 8, 288),/* const Struct_B o[4];*/
+ /* } */
+
+ /* struct B { */ /* [4] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */
+ /* } */
+
+ /* const int */ /* [5] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),
+ /* typedef struct b Struct_B */ /* [6] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 4),
+ /* const Struct_B */ /* [7] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 6),
+ /* const Struct_B [4] */ /* [8] */
+ BTF_TYPE_ARRAY_ENC(7, 1, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0B\0m\0n\0Struct_B",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0B\0m\0n\0Struct_B"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_test2_map",
+ .key_size = sizeof(int),
+ .value_size = 68,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 4,
+},
+
+{
+ .descr = "struct test #3 Invalid member offset",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int64 */ /* [2] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8),
+
+ /* struct A { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 16),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* int64 n; */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0",
+ .str_sec_size = sizeof("\0A\0m\0n\0"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_test3_map",
+ .key_size = sizeof(int),
+ .value_size = 16,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid member bits_offset",
+},
+
+/* Test member exceeds the size of struct.
+ *
+ * struct A {
+ * int m;
+ * int n;
+ * };
+ */
+{
+ .descr = "size check test #1",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* struct A { */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n",
+ .str_sec_size = sizeof("\0A\0m\0n"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "size_check1_map",
+ .key_size = sizeof(int),
+ .value_size = 1,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
+},
+
+/* Test member exeeds the size of struct
+ *
+ * struct A {
+ * int m;
+ * int n[2];
+ * };
+ */
+{
+ .descr = "size check test #2",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+ /* int[2] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 2),
+ /* struct A { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 3 - 1),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n[2]; */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n",
+ .str_sec_size = sizeof("\0A\0m\0n"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "size_check2_map",
+ .key_size = sizeof(int),
+ .value_size = 1,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
+},
+
+/* Test member exeeds the size of struct
+ *
+ * struct A {
+ * int m;
+ * void *n;
+ * };
+ */
+{
+ .descr = "size check test #3",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+ /* void* */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+ /* struct A { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) + sizeof(void *) - 1),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* void *n; */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n",
+ .str_sec_size = sizeof("\0A\0m\0n"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "size_check3_map",
+ .key_size = sizeof(int),
+ .value_size = 1,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
+},
+
+/* Test member exceeds the size of struct
+ *
+ * enum E {
+ * E0,
+ * E1,
+ * };
+ *
+ * struct A {
+ * int m;
+ * enum E n;
+ * };
+ */
+{
+ .descr = "size check test #4",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+ /* enum E { */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
+ BTF_ENUM_ENC(NAME_TBD, 0),
+ BTF_ENUM_ENC(NAME_TBD, 1),
+ /* } */
+ /* struct A { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* enum E n; */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0E\0E0\0E1\0A\0m\0n",
+ .str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "size_check4_map",
+ .key_size = sizeof(int),
+ .value_size = 1,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
+},
+
+/* typedef const void * const_void_ptr;
+ * struct A {
+ * const_void_ptr m;
+ * };
+ */
+{
+ .descr = "void test #1",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ /* const void* */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
+ /* typedef const void * const_void_ptr */
+ BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+ /* const_void_ptr m; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 0),
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0const_void_ptr\0A\0m",
+ .str_sec_size = sizeof("\0const_void_ptr\0A\0m"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "void_test1_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *),
+ .key_type_id = 1,
+ .value_type_id = 4,
+ .max_entries = 4,
+},
+
+/* struct A {
+ * const void m;
+ * };
+ */
+{
+ .descr = "void test #2",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ /* struct A { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 8),
+ /* const void m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m",
+ .str_sec_size = sizeof("\0A\0m"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "void_test2_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *),
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid member",
+},
+
+/* typedef const void * const_void_ptr;
+ * const_void_ptr[4]
+ */
+{
+ .descr = "void test #3",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ /* const void* */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
+ /* typedef const void * const_void_ptr */
+ BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [4] */
+ /* const_void_ptr[4] */
+ BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [5] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0const_void_ptr",
+ .str_sec_size = sizeof("\0const_void_ptr"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "void_test3_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *) * 4,
+ .key_type_id = 1,
+ .value_type_id = 5,
+ .max_entries = 4,
+},
+
+/* const void[4] */
+{
+ .descr = "void test #4",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ /* const void[4] */ /* [3] */
+ BTF_TYPE_ARRAY_ENC(2, 1, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m",
+ .str_sec_size = sizeof("\0A\0m"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "void_test4_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *) * 4,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid elem",
+},
+
+/* Array_A <------------------+
+ * elem_type == Array_B |
+ * | |
+ * | |
+ * Array_B <-------- + |
+ * elem_type == Array A --+
+ */
+{
+ .descr = "loop test #1",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* Array_A */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 8),
+ /* Array_B */ /* [3] */
+ BTF_TYPE_ARRAY_ENC(2, 1, 8),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test1_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(sizeof(int) * 8),
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Loop detected",
+},
+
+/* typedef is _before_ the BTF type of Array_A and Array_B
+ *
+ * typedef Array_B int_array;
+ *
+ * Array_A <------------------+
+ * elem_type == int_array |
+ * | |
+ * | |
+ * Array_B <-------- + |
+ * elem_type == Array_A --+
+ */
+{
+ .descr = "loop test #2",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* typedef Array_B int_array */
+ BTF_TYPEDEF_ENC(1, 4), /* [2] */
+ /* Array_A */
+ BTF_TYPE_ARRAY_ENC(2, 1, 8), /* [3] */
+ /* Array_B */
+ BTF_TYPE_ARRAY_ENC(3, 1, 8), /* [4] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int_array\0",
+ .str_sec_size = sizeof("\0int_array"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test2_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(sizeof(int) * 8),
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Loop detected",
+},
+
+/* Array_A <------------------+
+ * elem_type == Array_B |
+ * | |
+ * | |
+ * Array_B <-------- + |
+ * elem_type == Array_A --+
+ */
+{
+ .descr = "loop test #3",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* Array_A */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 8),
+ /* Array_B */ /* [3] */
+ BTF_TYPE_ARRAY_ENC(2, 1, 8),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test3_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(sizeof(int) * 8),
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Loop detected",
+},
+
+/* typedef is _between_ the BTF type of Array_A and Array_B
+ *
+ * typedef Array_B int_array;
+ *
+ * Array_A <------------------+
+ * elem_type == int_array |
+ * | |
+ * | |
+ * Array_B <-------- + |
+ * elem_type == Array_A --+
+ */
+{
+ .descr = "loop test #4",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* Array_A */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 8),
+ /* typedef Array_B int_array */ /* [3] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 4),
+ /* Array_B */ /* [4] */
+ BTF_TYPE_ARRAY_ENC(2, 1, 8),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int_array\0",
+ .str_sec_size = sizeof("\0int_array"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test4_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(sizeof(int) * 8),
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Loop detected",
+},
+
+/* typedef struct B Struct_B
+ *
+ * struct A {
+ * int x;
+ * Struct_B y;
+ * };
+ *
+ * struct B {
+ * int x;
+ * struct A y;
+ * };
+ */
+{
+ .descr = "loop test #5",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* struct A */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 32),/* Struct_B y; */
+ /* typedef struct B Struct_B */
+ BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */
+ /* struct B */ /* [4] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A y; */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0x\0y\0Struct_B\0B\0x\0y",
+ .str_sec_size = sizeof("\0A\0x\0y\0Struct_B\0B\0x\0y"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test5_map",
+ .key_size = sizeof(int),
+ .value_size = 8,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Loop detected",
+},
+
+/* struct A {
+ * int x;
+ * struct A array_a[4];
+ * };
+ */
+{
+ .descr = "loop test #6",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 4), /* [2] */
+ /* struct A */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A array_a[4]; */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0x\0y",
+ .str_sec_size = sizeof("\0A\0x\0y"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test6_map",
+ .key_size = sizeof(int),
+ .value_size = 8,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Loop detected",
+},
+
+{
+ .descr = "loop test #7",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* struct A { */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+ /* const void *m; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 0),
+ /* CONST type_id=3 */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+ /* PTR type_id=2 */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m",
+ .str_sec_size = sizeof("\0A\0m"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test7_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *),
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Loop detected",
+},
+
+{
+ .descr = "loop test #8",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* struct A { */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+ /* const void *m; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 0),
+ /* struct B { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+ /* const void *n; */
+ BTF_MEMBER_ENC(NAME_TBD, 6, 0),
+ /* CONST type_id=5 */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 5),
+ /* PTR type_id=6 */ /* [5] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 6),
+ /* CONST type_id=7 */ /* [6] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 7),
+ /* PTR type_id=4 */ /* [7] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0B\0n",
+ .str_sec_size = sizeof("\0A\0m\0B\0n"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test8_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *),
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Loop detected",
+},
+
+{
+ .descr = "string section does not end with null",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int") - 1,
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid string section",
+},
+
+{
+ .descr = "empty string section",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = 0,
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid string section",
+},
+
+{
+ .descr = "empty type section",
+ .raw_types = {
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "No type found",
+},
+
+{
+ .descr = "btf_header test. Longer hdr_len",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .hdr_len_delta = 4,
+ .err_str = "Unsupported btf_header",
+},
+
+{
+ .descr = "btf_header test. Gap between hdr and type",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .type_off_delta = 4,
+ .err_str = "Unsupported section found",
+},
+
+{
+ .descr = "btf_header test. Gap between type and str",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .str_off_delta = 4,
+ .err_str = "Unsupported section found",
+},
+
+{
+ .descr = "btf_header test. Overlap between type and str",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .str_off_delta = -4,
+ .err_str = "Section overlap found",
+},
+
+{
+ .descr = "btf_header test. Larger BTF size",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .str_len_delta = -4,
+ .err_str = "Unsupported section found",
+},
+
+{
+ .descr = "btf_header test. Smaller BTF size",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .str_len_delta = 4,
+ .err_str = "Total section length too long",
+},
+
+{
+ .descr = "array test. index_type/elem_type \"int\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 16),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "array test. index_type/elem_type \"const int\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 3, 16),
+ /* CONST type_id=1 */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "array test. index_type \"const int:31\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int:31 */ /* [2] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4),
+ /* int[16] */ /* [3] */
+ BTF_TYPE_ARRAY_ENC(1, 4, 16),
+ /* CONST type_id=2 */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid index",
+},
+
+{
+ .descr = "array test. elem_type \"const int:31\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int:31 */ /* [2] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4),
+ /* int[16] */ /* [3] */
+ BTF_TYPE_ARRAY_ENC(4, 1, 16),
+ /* CONST type_id=2 */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid array of int",
+},
+
+{
+ .descr = "array test. index_type \"void\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(1, 0, 16),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid index",
+},
+
+{
+ .descr = "array test. index_type \"const void\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(1, 3, 16),
+ /* CONST type_id=0 (void) */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid index",
+},
+
+{
+ .descr = "array test. elem_type \"const void\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 16),
+ /* CONST type_id=0 (void) */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid elem",
+},
+
+{
+ .descr = "array test. elem_type \"const void *\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void *[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 16),
+ /* CONST type_id=4 */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+ /* void* */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "array test. index_type \"const void *\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void *[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 3, 16),
+ /* CONST type_id=4 */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+ /* void* */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid index",
+},
+
+{
+ .descr = "array test. t->size != 0\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 1),
+ BTF_ARRAY_ENC(1, 1, 16),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "size != 0",
+},
+
+{
+ .descr = "int test. invalid int_data",
+ .raw_types = {
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), 4),
+ 0x10000000,
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid int_data",
+},
+
+{
+ .descr = "invalid BTF_INFO",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_TYPE_ENC(0, 0x10000000, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid btf_info",
+},
+
+{
+ .descr = "fwd test. t->type != 0\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* fwd type */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 1),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "fwd_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "type != 0",
+},
+
+{
+ .descr = "typedef (invalid name, name_off = 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(0, 1), /* [2] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__int",
+ .str_sec_size = sizeof("\0__int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "typedef_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "typedef (invalid name, invalid identifier)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__!int",
+ .str_sec_size = sizeof("\0__!int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "typedef_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "ptr type (invalid name, name_off <> 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1), /* [2] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__int",
+ .str_sec_size = sizeof("\0__int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "ptr_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "volatile type (invalid name, name_off <> 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 1), /* [2] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__int",
+ .str_sec_size = sizeof("\0__int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "volatile_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "const type (invalid name, name_off <> 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1), /* [2] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__int",
+ .str_sec_size = sizeof("\0__int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "const_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "restrict type (invalid name, name_off <> 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1), /* [2] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), 2), /* [3] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__int",
+ .str_sec_size = sizeof("\0__int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "restrict_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "fwd type (invalid name, name_off = 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0), /* [2] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__skb",
+ .str_sec_size = sizeof("\0__skb"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "fwd_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "fwd type (invalid name, invalid identifier)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0), /* [2] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__!skb",
+ .str_sec_size = sizeof("\0__!skb"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "fwd_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "array type (invalid name, name_off <> 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), /* [2] */
+ BTF_ARRAY_ENC(1, 1, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__skb",
+ .str_sec_size = sizeof("\0__skb"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "struct type (name_off = 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(0,
+ BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A",
+ .str_sec_size = sizeof("\0A"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "struct type (invalid name, invalid identifier)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A!\0B",
+ .str_sec_size = sizeof("\0A!\0B"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "struct member (name_off = 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(0,
+ BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A",
+ .str_sec_size = sizeof("\0A"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "struct member (invalid name, invalid identifier)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0B*",
+ .str_sec_size = sizeof("\0A\0B*"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "enum type (name_off = 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(0,
+ BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+ sizeof(int)), /* [2] */
+ BTF_ENUM_ENC(NAME_TBD, 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0B",
+ .str_sec_size = sizeof("\0A\0B"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "enum_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "enum type (invalid name, invalid identifier)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+ sizeof(int)), /* [2] */
+ BTF_ENUM_ENC(NAME_TBD, 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A!\0B",
+ .str_sec_size = sizeof("\0A!\0B"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "enum_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "enum member (invalid name, name_off = 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(0,
+ BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+ sizeof(int)), /* [2] */
+ BTF_ENUM_ENC(0, 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "enum_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "enum member (invalid name, invalid identifier)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(0,
+ BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+ sizeof(int)), /* [2] */
+ BTF_ENUM_ENC(NAME_TBD, 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A!",
+ .str_sec_size = sizeof("\0A!"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "enum_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+{
+ .descr = "arraymap invalid btf key (a bit field)",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* 32 bit int with 32 bit offset */ /* [2] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 32, 32, 8),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_map_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 2,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .map_create_err = true,
+},
+
+{
+ .descr = "arraymap invalid btf key (!= 32 bits)",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* 16 bit int with 0 bit offset */ /* [2] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 16, 2),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_map_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 2,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .map_create_err = true,
+},
+
+{
+ .descr = "arraymap invalid btf value (too small)",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_map_check_btf",
+ .key_size = sizeof(int),
+ /* btf_value_size < map->value_size */
+ .value_size = sizeof(__u64),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .map_create_err = true,
+},
+
+{
+ .descr = "arraymap invalid btf value (too big)",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_map_check_btf",
+ .key_size = sizeof(int),
+ /* btf_value_size > map->value_size */
+ .value_size = sizeof(__u16),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .map_create_err = true,
+},
+
+}; /* struct btf_raw_test raw_tests[] */
+
+static const char *get_next_str(const char *start, const char *end)
+{
+ return start < end - 1 ? start + 1 : NULL;
+}
+
+static int get_type_sec_size(const __u32 *raw_types)
+{
+ int i;
+
+ for (i = MAX_NR_RAW_TYPES - 1;
+ i >= 0 && raw_types[i] != BTF_END_RAW;
+ i--)
+ ;
+
+ return i < 0 ? i : i * sizeof(raw_types[0]);
+}
+
+static void *btf_raw_create(const struct btf_header *hdr,
+ const __u32 *raw_types,
+ const char *str,
+ unsigned int str_sec_size,
+ unsigned int *btf_size)
+{
+ const char *next_str = str, *end_str = str + str_sec_size;
+ unsigned int size_needed, offset;
+ struct btf_header *ret_hdr;
+ int i, type_sec_size;
+ uint32_t *ret_types;
+ void *raw_btf;
+
+ type_sec_size = get_type_sec_size(raw_types);
+ if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types"))
+ return NULL;
+
+ size_needed = sizeof(*hdr) + type_sec_size + str_sec_size;
+ raw_btf = malloc(size_needed);
+ if (CHECK(!raw_btf, "Cannot allocate memory for raw_btf"))
+ return NULL;
+
+ /* Copy header */
+ memcpy(raw_btf, hdr, sizeof(*hdr));
+ offset = sizeof(*hdr);
+
+ /* Copy type section */
+ ret_types = raw_btf + offset;
+ for (i = 0; i < type_sec_size / sizeof(raw_types[0]); i++) {
+ if (raw_types[i] == NAME_TBD) {
+ next_str = get_next_str(next_str, end_str);
+ if (CHECK(!next_str, "Error in getting next_str")) {
+ free(raw_btf);
+ return NULL;
+ }
+ ret_types[i] = next_str - str;
+ next_str += strlen(next_str);
+ } else {
+ ret_types[i] = raw_types[i];
+ }
+ }
+ offset += type_sec_size;
+
+ /* Copy string section */
+ memcpy(raw_btf + offset, str, str_sec_size);
+
+ ret_hdr = (struct btf_header *)raw_btf;
+ ret_hdr->type_len = type_sec_size;
+ ret_hdr->str_off = type_sec_size;
+ ret_hdr->str_len = str_sec_size;
+
+ *btf_size = size_needed;
+
+ return raw_btf;
+}
+
+static int do_test_raw(unsigned int test_num)
+{
+ struct btf_raw_test *test = &raw_tests[test_num - 1];
+ struct bpf_create_map_attr create_attr = {};
+ int map_fd = -1, btf_fd = -1;
+ unsigned int raw_btf_size;
+ struct btf_header *hdr;
+ void *raw_btf;
+ int err;
+
+ fprintf(stderr, "BTF raw test[%u] (%s): ", test_num, test->descr);
+ raw_btf = btf_raw_create(&hdr_tmpl,
+ test->raw_types,
+ test->str_sec,
+ test->str_sec_size,
+ &raw_btf_size);
+
+ if (!raw_btf)
+ return -1;
+
+ hdr = raw_btf;
+
+ hdr->hdr_len = (int)hdr->hdr_len + test->hdr_len_delta;
+ hdr->type_off = (int)hdr->type_off + test->type_off_delta;
+ hdr->str_off = (int)hdr->str_off + test->str_off_delta;
+ hdr->str_len = (int)hdr->str_len + test->str_len_delta;
+
+ *btf_log_buf = '\0';
+ btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+ btf_log_buf, BTF_LOG_BUF_SIZE,
+ args.always_log);
+ free(raw_btf);
+
+ err = ((btf_fd == -1) != test->btf_load_err);
+ if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
+ btf_fd, test->btf_load_err) ||
+ CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
+ "expected err_str:%s", test->err_str)) {
+ err = -1;
+ goto done;
+ }
+
+ if (err || btf_fd == -1)
+ goto done;
+
+ create_attr.name = test->map_name;
+ create_attr.map_type = test->map_type;
+ create_attr.key_size = test->key_size;
+ create_attr.value_size = test->value_size;
+ create_attr.max_entries = test->max_entries;
+ create_attr.btf_fd = btf_fd;
+ create_attr.btf_key_type_id = test->key_type_id;
+ create_attr.btf_value_type_id = test->value_type_id;
+
+ map_fd = bpf_create_map_xattr(&create_attr);
+
+ err = ((map_fd == -1) != test->map_create_err);
+ CHECK(err, "map_fd:%d test->map_create_err:%u",
+ map_fd, test->map_create_err);
+
+done:
+ if (!err)
+ fprintf(stderr, "OK");
+
+ if (*btf_log_buf && (err || args.always_log))
+ fprintf(stderr, "\n%s", btf_log_buf);
+
+ if (btf_fd != -1)
+ close(btf_fd);
+ if (map_fd != -1)
+ close(map_fd);
+
+ return err;
+}
+
+static int test_raw(void)
+{
+ unsigned int i;
+ int err = 0;
+
+ if (args.raw_test_num)
+ return count_result(do_test_raw(args.raw_test_num));
+
+ for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
+ err |= count_result(do_test_raw(i));
+
+ return err;
+}
+
+struct btf_get_info_test {
+ const char *descr;
+ const char *str_sec;
+ __u32 raw_types[MAX_NR_RAW_TYPES];
+ __u32 str_sec_size;
+ int btf_size_delta;
+ int (*special_test)(unsigned int test_num);
+};
+
+static int test_big_btf_info(unsigned int test_num);
+static int test_btf_id(unsigned int test_num);
+
+const struct btf_get_info_test get_info_tests[] = {
+{
+ .descr = "== raw_btf_size+1",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .btf_size_delta = 1,
+},
+{
+ .descr = "== raw_btf_size-3",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .btf_size_delta = -3,
+},
+{
+ .descr = "Large bpf_btf_info",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .special_test = test_big_btf_info,
+},
+{
+ .descr = "BTF ID",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* unsigned int */ /* [2] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .special_test = test_btf_id,
+},
+};
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64)(unsigned long)ptr;
+}
+
+static int test_big_btf_info(unsigned int test_num)
+{
+ const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+ uint8_t *raw_btf = NULL, *user_btf = NULL;
+ unsigned int raw_btf_size;
+ struct {
+ struct bpf_btf_info info;
+ uint64_t garbage;
+ } info_garbage;
+ struct bpf_btf_info *info;
+ int btf_fd = -1, err;
+ uint32_t info_len;
+
+ raw_btf = btf_raw_create(&hdr_tmpl,
+ test->raw_types,
+ test->str_sec,
+ test->str_sec_size,
+ &raw_btf_size);
+
+ if (!raw_btf)
+ return -1;
+
+ *btf_log_buf = '\0';
+
+ user_btf = malloc(raw_btf_size);
+ if (CHECK(!user_btf, "!user_btf")) {
+ err = -1;
+ goto done;
+ }
+
+ btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+ btf_log_buf, BTF_LOG_BUF_SIZE,
+ args.always_log);
+ if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+
+ /*
+ * GET_INFO should error out if the userspace info
+ * has non zero tailing bytes.
+ */
+ info = &info_garbage.info;
+ memset(info, 0, sizeof(*info));
+ info_garbage.garbage = 0xdeadbeef;
+ info_len = sizeof(info_garbage);
+ info->btf = ptr_to_u64(user_btf);
+ info->btf_size = raw_btf_size;
+
+ err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+ if (CHECK(!err, "!err")) {
+ err = -1;
+ goto done;
+ }
+
+ /*
+ * GET_INFO should succeed even info_len is larger than
+ * the kernel supported as long as tailing bytes are zero.
+ * The kernel supported info len should also be returned
+ * to userspace.
+ */
+ info_garbage.garbage = 0;
+ err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+ if (CHECK(err || info_len != sizeof(*info),
+ "err:%d errno:%d info_len:%u sizeof(*info):%lu",
+ err, errno, info_len, sizeof(*info))) {
+ err = -1;
+ goto done;
+ }
+
+ fprintf(stderr, "OK");
+
+done:
+ if (*btf_log_buf && (err || args.always_log))
+ fprintf(stderr, "\n%s", btf_log_buf);
+
+ free(raw_btf);
+ free(user_btf);
+
+ if (btf_fd != -1)
+ close(btf_fd);
+
+ return err;
+}
+
+static int test_btf_id(unsigned int test_num)
+{
+ const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+ struct bpf_create_map_attr create_attr = {};
+ uint8_t *raw_btf = NULL, *user_btf[2] = {};
+ int btf_fd[2] = {-1, -1}, map_fd = -1;
+ struct bpf_map_info map_info = {};
+ struct bpf_btf_info info[2] = {};
+ unsigned int raw_btf_size;
+ uint32_t info_len;
+ int err, i, ret;
+
+ raw_btf = btf_raw_create(&hdr_tmpl,
+ test->raw_types,
+ test->str_sec,
+ test->str_sec_size,
+ &raw_btf_size);
+
+ if (!raw_btf)
+ return -1;
+
+ *btf_log_buf = '\0';
+
+ for (i = 0; i < 2; i++) {
+ user_btf[i] = malloc(raw_btf_size);
+ if (CHECK(!user_btf[i], "!user_btf[%d]", i)) {
+ err = -1;
+ goto done;
+ }
+ info[i].btf = ptr_to_u64(user_btf[i]);
+ info[i].btf_size = raw_btf_size;
+ }
+
+ btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
+ btf_log_buf, BTF_LOG_BUF_SIZE,
+ args.always_log);
+ if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+
+ /* Test BPF_OBJ_GET_INFO_BY_ID on btf_id */
+ info_len = sizeof(info[0]);
+ err = bpf_obj_get_info_by_fd(btf_fd[0], &info[0], &info_len);
+ if (CHECK(err, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+
+ btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id);
+ if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+
+ ret = 0;
+ err = bpf_obj_get_info_by_fd(btf_fd[1], &info[1], &info_len);
+ if (CHECK(err || info[0].id != info[1].id ||
+ info[0].btf_size != info[1].btf_size ||
+ (ret = memcmp(user_btf[0], user_btf[1], info[0].btf_size)),
+ "err:%d errno:%d id0:%u id1:%u btf_size0:%u btf_size1:%u memcmp:%d",
+ err, errno, info[0].id, info[1].id,
+ info[0].btf_size, info[1].btf_size, ret)) {
+ err = -1;
+ goto done;
+ }
+
+ /* Test btf members in struct bpf_map_info */
+ create_attr.name = "test_btf_id";
+ create_attr.map_type = BPF_MAP_TYPE_ARRAY;
+ create_attr.key_size = sizeof(int);
+ create_attr.value_size = sizeof(unsigned int);
+ create_attr.max_entries = 4;
+ create_attr.btf_fd = btf_fd[0];
+ create_attr.btf_key_type_id = 1;
+ create_attr.btf_value_type_id = 2;
+
+ map_fd = bpf_create_map_xattr(&create_attr);
+ if (CHECK(map_fd == -1, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+
+ info_len = sizeof(map_info);
+ err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+ if (CHECK(err || map_info.btf_id != info[0].id ||
+ map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2,
+ "err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u",
+ err, errno, info[0].id, map_info.btf_id, map_info.btf_key_type_id,
+ map_info.btf_value_type_id)) {
+ err = -1;
+ goto done;
+ }
+
+ for (i = 0; i < 2; i++) {
+ close(btf_fd[i]);
+ btf_fd[i] = -1;
+ }
+
+ /* Test BTF ID is removed from the kernel */
+ btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
+ if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+ close(btf_fd[0]);
+ btf_fd[0] = -1;
+
+ /* The map holds the last ref to BTF and its btf_id */
+ close(map_fd);
+ map_fd = -1;
+ btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
+ if (CHECK(btf_fd[0] != -1, "BTF lingers")) {
+ err = -1;
+ goto done;
+ }
+
+ fprintf(stderr, "OK");
+
+done:
+ if (*btf_log_buf && (err || args.always_log))
+ fprintf(stderr, "\n%s", btf_log_buf);
+
+ free(raw_btf);
+ if (map_fd != -1)
+ close(map_fd);
+ for (i = 0; i < 2; i++) {
+ free(user_btf[i]);
+ if (btf_fd[i] != -1)
+ close(btf_fd[i]);
+ }
+
+ return err;
+}
+
+static int do_test_get_info(unsigned int test_num)
+{
+ const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+ unsigned int raw_btf_size, user_btf_size, expected_nbytes;
+ uint8_t *raw_btf = NULL, *user_btf = NULL;
+ struct bpf_btf_info info = {};
+ int btf_fd = -1, err, ret;
+ uint32_t info_len;
+
+ fprintf(stderr, "BTF GET_INFO test[%u] (%s): ",
+ test_num, test->descr);
+
+ if (test->special_test)
+ return test->special_test(test_num);
+
+ raw_btf = btf_raw_create(&hdr_tmpl,
+ test->raw_types,
+ test->str_sec,
+ test->str_sec_size,
+ &raw_btf_size);
+
+ if (!raw_btf)
+ return -1;
+
+ *btf_log_buf = '\0';
+
+ user_btf = malloc(raw_btf_size);
+ if (CHECK(!user_btf, "!user_btf")) {
+ err = -1;
+ goto done;
+ }
+
+ btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+ btf_log_buf, BTF_LOG_BUF_SIZE,
+ args.always_log);
+ if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+
+ user_btf_size = (int)raw_btf_size + test->btf_size_delta;
+ expected_nbytes = min(raw_btf_size, user_btf_size);
+ if (raw_btf_size > expected_nbytes)
+ memset(user_btf + expected_nbytes, 0xff,
+ raw_btf_size - expected_nbytes);
+
+ info_len = sizeof(info);
+ info.btf = ptr_to_u64(user_btf);
+ info.btf_size = user_btf_size;
+
+ ret = 0;
+ err = bpf_obj_get_info_by_fd(btf_fd, &info, &info_len);
+ if (CHECK(err || !info.id || info_len != sizeof(info) ||
+ info.btf_size != raw_btf_size ||
+ (ret = memcmp(raw_btf, user_btf, expected_nbytes)),
+ "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%lu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d",
+ err, errno, info.id, info_len, sizeof(info),
+ raw_btf_size, info.btf_size, expected_nbytes, ret)) {
+ err = -1;
+ goto done;
+ }
+
+ while (expected_nbytes < raw_btf_size) {
+ fprintf(stderr, "%u...", expected_nbytes);
+ if (CHECK(user_btf[expected_nbytes++] != 0xff,
+ "user_btf[%u]:%x != 0xff", expected_nbytes - 1,
+ user_btf[expected_nbytes - 1])) {
+ err = -1;
+ goto done;
+ }
+ }
+
+ fprintf(stderr, "OK");
+
+done:
+ if (*btf_log_buf && (err || args.always_log))
+ fprintf(stderr, "\n%s", btf_log_buf);
+
+ free(raw_btf);
+ free(user_btf);
+
+ if (btf_fd != -1)
+ close(btf_fd);
+
+ return err;
+}
+
+static int test_get_info(void)
+{
+ unsigned int i;
+ int err = 0;
+
+ if (args.get_info_test_num)
+ return count_result(do_test_get_info(args.get_info_test_num));
+
+ for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
+ err |= count_result(do_test_get_info(i));
+
+ return err;
+}
+
+struct btf_file_test {
+ const char *file;
+ bool btf_kv_notfound;
+};
+
+static struct btf_file_test file_tests[] = {
+{
+ .file = "test_btf_haskv.o",
+},
+{
+ .file = "test_btf_nokv.o",
+ .btf_kv_notfound = true,
+},
+};
+
+static int file_has_btf_elf(const char *fn)
+{
+ Elf_Scn *scn = NULL;
+ GElf_Ehdr ehdr;
+ int elf_fd;
+ Elf *elf;
+ int ret;
+
+ if (CHECK(elf_version(EV_CURRENT) == EV_NONE,
+ "elf_version(EV_CURRENT) == EV_NONE"))
+ return -1;
+
+ elf_fd = open(fn, O_RDONLY);
+ if (CHECK(elf_fd == -1, "open(%s): errno:%d", fn, errno))
+ return -1;
+
+ elf = elf_begin(elf_fd, ELF_C_READ, NULL);
+ if (CHECK(!elf, "elf_begin(%s): %s", fn, elf_errmsg(elf_errno()))) {
+ ret = -1;
+ goto done;
+ }
+
+ if (CHECK(!gelf_getehdr(elf, &ehdr), "!gelf_getehdr(%s)", fn)) {
+ ret = -1;
+ goto done;
+ }
+
+ while ((scn = elf_nextscn(elf, scn))) {
+ const char *sh_name;
+ GElf_Shdr sh;
+
+ if (CHECK(gelf_getshdr(scn, &sh) != &sh,
+ "file:%s gelf_getshdr != &sh", fn)) {
+ ret = -1;
+ goto done;
+ }
+
+ sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
+ if (!strcmp(sh_name, BTF_ELF_SEC)) {
+ ret = 1;
+ goto done;
+ }
+ }
+
+ ret = 0;
+
+done:
+ close(elf_fd);
+ elf_end(elf);
+ return ret;
+}
+
+static int do_test_file(unsigned int test_num)
+{
+ const struct btf_file_test *test = &file_tests[test_num - 1];
+ struct bpf_object *obj = NULL;
+ struct bpf_program *prog;
+ struct bpf_map *map;
+ int err;
+
+ fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
+ test->file);
+
+ err = file_has_btf_elf(test->file);
+ if (err == -1)
+ return err;
+
+ if (err == 0) {
+ fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC);
+ skip_cnt++;
+ return 0;
+ }
+
+ obj = bpf_object__open(test->file);
+ if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
+ return PTR_ERR(obj);
+
+ err = bpf_object__btf_fd(obj);
+ if (CHECK(err == -1, "bpf_object__btf_fd: -1"))
+ goto done;
+
+ prog = bpf_program__next(NULL, obj);
+ if (CHECK(!prog, "Cannot find bpf_prog")) {
+ err = -1;
+ goto done;
+ }
+
+ bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT);
+ err = bpf_object__load(obj);
+ if (CHECK(err < 0, "bpf_object__load: %d", err))
+ goto done;
+
+ map = bpf_object__find_map_by_name(obj, "btf_map");
+ if (CHECK(!map, "btf_map not found")) {
+ err = -1;
+ goto done;
+ }
+
+ err = (bpf_map__btf_key_type_id(map) == 0 || bpf_map__btf_value_type_id(map) == 0)
+ != test->btf_kv_notfound;
+ if (CHECK(err, "btf_key_type_id:%u btf_value_type_id:%u test->btf_kv_notfound:%u",
+ bpf_map__btf_key_type_id(map), bpf_map__btf_value_type_id(map),
+ test->btf_kv_notfound))
+ goto done;
+
+ fprintf(stderr, "OK");
+
+done:
+ bpf_object__close(obj);
+ return err;
+}
+
+static int test_file(void)
+{
+ unsigned int i;
+ int err = 0;
+
+ if (args.file_test_num)
+ return count_result(do_test_file(args.file_test_num));
+
+ for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
+ err |= count_result(do_test_file(i));
+
+ return err;
+}
+
+const char *pprint_enum_str[] = {
+ "ENUM_ZERO",
+ "ENUM_ONE",
+ "ENUM_TWO",
+ "ENUM_THREE",
+};
+
+struct pprint_mapv {
+ uint32_t ui32;
+ uint16_t ui16;
+ /* 2 bytes hole */
+ int32_t si32;
+ uint32_t unused_bits2a:2,
+ bits28:28,
+ unused_bits2b:2;
+ union {
+ uint64_t ui64;
+ uint8_t ui8a[8];
+ };
+ enum {
+ ENUM_ZERO,
+ ENUM_ONE,
+ ENUM_TWO,
+ ENUM_THREE,
+ } aenum;
+};
+
+static struct btf_raw_test pprint_test_template = {
+ .raw_types = {
+ /* unsighed char */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1),
+ /* unsigned short */ /* [2] */
+ BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2),
+ /* unsigned int */ /* [3] */
+ BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4),
+ /* int */ /* [4] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* unsigned long long */ /* [5] */
+ BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8),
+ /* 2 bits */ /* [6] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 2, 2),
+ /* 28 bits */ /* [7] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 28, 4),
+ /* uint8_t[8] */ /* [8] */
+ BTF_TYPE_ARRAY_ENC(9, 1, 8),
+ /* typedef unsigned char uint8_t */ /* [9] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 1),
+ /* typedef unsigned short uint16_t */ /* [10] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 2),
+ /* typedef unsigned int uint32_t */ /* [11] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 3),
+ /* typedef int int32_t */ /* [12] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 4),
+ /* typedef unsigned long long uint64_t *//* [13] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 5),
+ /* union (anon) */ /* [14] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8),
+ BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */
+ BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */
+ /* enum (anon) */ /* [15] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4),
+ BTF_ENUM_ENC(NAME_TBD, 0),
+ BTF_ENUM_ENC(NAME_TBD, 1),
+ BTF_ENUM_ENC(NAME_TBD, 2),
+ BTF_ENUM_ENC(NAME_TBD, 3),
+ /* struct pprint_mapv */ /* [16] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 8), 32),
+ BTF_MEMBER_ENC(NAME_TBD, 11, 0), /* uint32_t ui32 */
+ BTF_MEMBER_ENC(NAME_TBD, 10, 32), /* uint16_t ui16 */
+ BTF_MEMBER_ENC(NAME_TBD, 12, 64), /* int32_t si32 */
+ BTF_MEMBER_ENC(NAME_TBD, 6, 96), /* unused_bits2a */
+ BTF_MEMBER_ENC(NAME_TBD, 7, 98), /* bits28 */
+ BTF_MEMBER_ENC(NAME_TBD, 6, 126), /* unused_bits2b */
+ BTF_MEMBER_ENC(0, 14, 128), /* union (anon) */
+ BTF_MEMBER_ENC(NAME_TBD, 15, 192), /* aenum */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum",
+ .str_sec_size = sizeof("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"),
+ .key_size = sizeof(unsigned int),
+ .value_size = sizeof(struct pprint_mapv),
+ .key_type_id = 3, /* unsigned int */
+ .value_type_id = 16, /* struct pprint_mapv */
+ .max_entries = 128 * 1024,
+};
+
+static struct btf_pprint_test_meta {
+ const char *descr;
+ enum bpf_map_type map_type;
+ const char *map_name;
+ bool ordered_map;
+ bool lossless_map;
+} pprint_tests_meta[] = {
+{
+ .descr = "BTF pretty print array",
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "pprint_test_array",
+ .ordered_map = true,
+ .lossless_map = true,
+},
+
+{
+ .descr = "BTF pretty print hash",
+ .map_type = BPF_MAP_TYPE_HASH,
+ .map_name = "pprint_test_hash",
+ .ordered_map = false,
+ .lossless_map = true,
+},
+
+{
+ .descr = "BTF pretty print lru hash",
+ .map_type = BPF_MAP_TYPE_LRU_HASH,
+ .map_name = "pprint_test_lru_hash",
+ .ordered_map = false,
+ .lossless_map = false,
+},
+
+};
+
+
+static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i)
+{
+ v->ui32 = i;
+ v->si32 = -i;
+ v->unused_bits2a = 3;
+ v->bits28 = i;
+ v->unused_bits2b = 3;
+ v->ui64 = i;
+ v->aenum = i & 0x03;
+}
+
+static int do_test_pprint(void)
+{
+ const struct btf_raw_test *test = &pprint_test_template;
+ struct bpf_create_map_attr create_attr = {};
+ unsigned int key, nr_read_elems;
+ bool ordered_map, lossless_map;
+ int map_fd = -1, btf_fd = -1;
+ struct pprint_mapv mapv = {};
+ unsigned int raw_btf_size;
+ char expected_line[255];
+ FILE *pin_file = NULL;
+ char pin_path[255];
+ size_t line_len = 0;
+ char *line = NULL;
+ uint8_t *raw_btf;
+ ssize_t nread;
+ int err, ret;
+
+ fprintf(stderr, "%s......", test->descr);
+ raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
+ test->str_sec, test->str_sec_size,
+ &raw_btf_size);
+
+ if (!raw_btf)
+ return -1;
+
+ *btf_log_buf = '\0';
+ btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+ btf_log_buf, BTF_LOG_BUF_SIZE,
+ args.always_log);
+ free(raw_btf);
+
+ if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+
+ create_attr.name = test->map_name;
+ create_attr.map_type = test->map_type;
+ create_attr.key_size = test->key_size;
+ create_attr.value_size = test->value_size;
+ create_attr.max_entries = test->max_entries;
+ create_attr.btf_fd = btf_fd;
+ create_attr.btf_key_type_id = test->key_type_id;
+ create_attr.btf_value_type_id = test->value_type_id;
+
+ map_fd = bpf_create_map_xattr(&create_attr);
+ if (CHECK(map_fd == -1, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+
+ ret = snprintf(pin_path, sizeof(pin_path), "%s/%s",
+ "/sys/fs/bpf", test->map_name);
+
+ if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long",
+ "/sys/fs/bpf", test->map_name)) {
+ err = -1;
+ goto done;
+ }
+
+ err = bpf_obj_pin(map_fd, pin_path);
+ if (CHECK(err, "bpf_obj_pin(%s): errno:%d.", pin_path, errno))
+ goto done;
+
+ for (key = 0; key < test->max_entries; key++) {
+ set_pprint_mapv(&mapv, key);
+ bpf_map_update_elem(map_fd, &key, &mapv, 0);
+ }
+
+ pin_file = fopen(pin_path, "r");
+ if (CHECK(!pin_file, "fopen(%s): errno:%d", pin_path, errno)) {
+ err = -1;
+ goto done;
+ }
+
+ /* Skip lines start with '#' */
+ while ((nread = getline(&line, &line_len, pin_file)) > 0 &&
+ *line == '#')
+ ;
+
+ if (CHECK(nread <= 0, "Unexpected EOF")) {
+ err = -1;
+ goto done;
+ }
+
+ nr_read_elems = 0;
+ ordered_map = test->ordered_map;
+ lossless_map = test->lossless_map;
+ do {
+ ssize_t nexpected_line;
+ unsigned int next_key;
+
+ next_key = ordered_map ? nr_read_elems : atoi(line);
+ set_pprint_mapv(&mapv, next_key);
+ nexpected_line = snprintf(expected_line, sizeof(expected_line),
+ "%u: {%u,0,%d,0x%x,0x%x,0x%x,{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n",
+ next_key,
+ mapv.ui32, mapv.si32,
+ mapv.unused_bits2a, mapv.bits28, mapv.unused_bits2b,
+ mapv.ui64,
+ mapv.ui8a[0], mapv.ui8a[1], mapv.ui8a[2], mapv.ui8a[3],
+ mapv.ui8a[4], mapv.ui8a[5], mapv.ui8a[6], mapv.ui8a[7],
+ pprint_enum_str[mapv.aenum]);
+
+ if (CHECK(nexpected_line == sizeof(expected_line),
+ "expected_line is too long")) {
+ err = -1;
+ goto done;
+ }
+
+ if (strcmp(expected_line, line)) {
+ err = -1;
+ fprintf(stderr, "unexpected pprint output\n");
+ fprintf(stderr, "expected: %s", expected_line);
+ fprintf(stderr, " read: %s", line);
+ goto done;
+ }
+
+ nread = getline(&line, &line_len, pin_file);
+ } while (++nr_read_elems < test->max_entries && nread > 0);
+
+ if (lossless_map &&
+ CHECK(nr_read_elems < test->max_entries,
+ "Unexpected EOF. nr_read_elems:%u test->max_entries:%u",
+ nr_read_elems, test->max_entries)) {
+ err = -1;
+ goto done;
+ }
+
+ if (CHECK(nread > 0, "Unexpected extra pprint output: %s", line)) {
+ err = -1;
+ goto done;
+ }
+
+ err = 0;
+
+done:
+ if (!err)
+ fprintf(stderr, "OK");
+ if (*btf_log_buf && (err || args.always_log))
+ fprintf(stderr, "\n%s", btf_log_buf);
+ if (btf_fd != -1)
+ close(btf_fd);
+ if (map_fd != -1)
+ close(map_fd);
+ if (pin_file)
+ fclose(pin_file);
+ unlink(pin_path);
+ free(line);
+
+ return err;
+}
+
+static int test_pprint(void)
+{
+ unsigned int i;
+ int err = 0;
+
+ for (i = 0; i < ARRAY_SIZE(pprint_tests_meta); i++) {
+ pprint_test_template.descr = pprint_tests_meta[i].descr;
+ pprint_test_template.map_type = pprint_tests_meta[i].map_type;
+ pprint_test_template.map_name = pprint_tests_meta[i].map_name;
+ pprint_test_template.ordered_map = pprint_tests_meta[i].ordered_map;
+ pprint_test_template.lossless_map = pprint_tests_meta[i].lossless_map;
+
+ err |= count_result(do_test_pprint());
+ }
+
+ return err;
+}
+
+static void usage(const char *cmd)
+{
+ fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] | [-g test_num (1 - %zu)] | [-f test_num (1 - %zu)] | [-p]]\n",
+ cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests),
+ ARRAY_SIZE(file_tests));
+}
+
+static int parse_args(int argc, char **argv)
+{
+ const char *optstr = "lpf:r:g:";
+ int opt;
+
+ while ((opt = getopt(argc, argv, optstr)) != -1) {
+ switch (opt) {
+ case 'l':
+ args.always_log = true;
+ break;
+ case 'f':
+ args.file_test_num = atoi(optarg);
+ args.file_test = true;
+ break;
+ case 'r':
+ args.raw_test_num = atoi(optarg);
+ args.raw_test = true;
+ break;
+ case 'g':
+ args.get_info_test_num = atoi(optarg);
+ args.get_info_test = true;
+ break;
+ case 'p':
+ args.pprint_test = true;
+ break;
+ case 'h':
+ usage(argv[0]);
+ exit(0);
+ default:
+ usage(argv[0]);
+ return -1;
+ }
+ }
+
+ if (args.raw_test_num &&
+ (args.raw_test_num < 1 ||
+ args.raw_test_num > ARRAY_SIZE(raw_tests))) {
+ fprintf(stderr, "BTF raw test number must be [1 - %zu]\n",
+ ARRAY_SIZE(raw_tests));
+ return -1;
+ }
+
+ if (args.file_test_num &&
+ (args.file_test_num < 1 ||
+ args.file_test_num > ARRAY_SIZE(file_tests))) {
+ fprintf(stderr, "BTF file test number must be [1 - %zu]\n",
+ ARRAY_SIZE(file_tests));
+ return -1;
+ }
+
+ if (args.get_info_test_num &&
+ (args.get_info_test_num < 1 ||
+ args.get_info_test_num > ARRAY_SIZE(get_info_tests))) {
+ fprintf(stderr, "BTF get info test number must be [1 - %zu]\n",
+ ARRAY_SIZE(get_info_tests));
+ return -1;
+ }
+
+ return 0;
+}
+
+static void print_summary(void)
+{
+ fprintf(stderr, "PASS:%u SKIP:%u FAIL:%u\n",
+ pass_cnt - skip_cnt, skip_cnt, error_cnt);
+}
+
+int main(int argc, char **argv)
+{
+ int err = 0;
+
+ err = parse_args(argc, argv);
+ if (err)
+ return err;
+
+ if (args.always_log)
+ libbpf_set_print(__base_pr, __base_pr, __base_pr);
+
+ if (args.raw_test)
+ err |= test_raw();
+
+ if (args.get_info_test)
+ err |= test_get_info();
+
+ if (args.file_test)
+ err |= test_file();
+
+ if (args.pprint_test)
+ err |= test_pprint();
+
+ if (args.raw_test || args.get_info_test || args.file_test ||
+ args.pprint_test)
+ goto done;
+
+ err |= test_raw();
+ err |= test_get_info();
+ err |= test_file();
+
+done:
+ print_summary();
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/test_btf_haskv.c
new file mode 100644
index 000000000..b21b876f4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf_haskv.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct ipv_counts {
+ unsigned int v4;
+ unsigned int v6;
+};
+
+struct bpf_map_def SEC("maps") btf_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct ipv_counts),
+ .max_entries = 4,
+};
+
+BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
+
+struct dummy_tracepoint_args {
+ unsigned long long pad;
+ struct sock *sock;
+};
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+ struct ipv_counts *counts;
+ int key = 0;
+
+ if (!arg->sock)
+ return 0;
+
+ counts = bpf_map_lookup_elem(&btf_map, &key);
+ if (!counts)
+ return 0;
+
+ counts->v6++;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_btf_nokv.c b/tools/testing/selftests/bpf/test_btf_nokv.c
new file mode 100644
index 000000000..0ed8e088e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf_nokv.c
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct ipv_counts {
+ unsigned int v4;
+ unsigned int v6;
+};
+
+struct bpf_map_def SEC("maps") btf_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct ipv_counts),
+ .max_entries = 4,
+};
+
+struct dummy_tracepoint_args {
+ unsigned long long pad;
+ struct sock *sock;
+};
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+ struct ipv_counts *counts;
+ int key = 0;
+
+ if (!arg->sock)
+ return 0;
+
+ counts = bpf_map_lookup_elem(&btf_map, &key);
+ if (!counts)
+ return 0;
+
+ counts->v6++;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
new file mode 100644
index 000000000..4e196e3bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
+#include <bpf/bpf.h>
+#include <linux/filter.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+#define TEST_CGROUP "/test-bpf-cgroup-storage-buf/"
+
+int main(int argc, char **argv)
+{
+ struct bpf_insn prog[] = {
+ BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */
+ BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ };
+ size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+ int error = EXIT_FAILURE;
+ int map_fd, prog_fd, cgroup_fd;
+ struct bpf_cgroup_storage_key key;
+ unsigned long long value;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key),
+ sizeof(value), 0, 0);
+ if (map_fd < 0) {
+ printf("Failed to create map: %s\n", strerror(errno));
+ goto out;
+ }
+
+ prog[0].imm = map_fd;
+ prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ prog, insns_cnt, "GPL", 0,
+ bpf_log_buf, BPF_LOG_BUF_SIZE);
+ if (prog_fd < 0) {
+ printf("Failed to load bpf program: %s\n", bpf_log_buf);
+ goto out;
+ }
+
+ if (setup_cgroup_environment()) {
+ printf("Failed to setup cgroup environment\n");
+ goto err;
+ }
+
+ /* Create a cgroup, get fd, and join it */
+ cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+ if (!cgroup_fd) {
+ printf("Failed to create test cgroup\n");
+ goto err;
+ }
+
+ if (join_cgroup(TEST_CGROUP)) {
+ printf("Failed to join cgroup\n");
+ goto err;
+ }
+
+ /* Attach the bpf program */
+ if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
+ printf("Failed to attach bpf program\n");
+ goto err;
+ }
+
+ if (bpf_map_get_next_key(map_fd, NULL, &key)) {
+ printf("Failed to get the first key in cgroup storage\n");
+ goto err;
+ }
+
+ if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+ printf("Failed to lookup cgroup storage\n");
+ goto err;
+ }
+
+ /* Every second packet should be dropped */
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null"));
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+
+ /* Check the counter in the cgroup local storage */
+ if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+ printf("Failed to lookup cgroup storage\n");
+ goto err;
+ }
+
+ if (value != 3) {
+ printf("Unexpected data in the cgroup storage: %llu\n", value);
+ goto err;
+ }
+
+ /* Bump the counter in the cgroup local storage */
+ value++;
+ if (bpf_map_update_elem(map_fd, &key, &value, 0)) {
+ printf("Failed to update the data in the cgroup storage\n");
+ goto err;
+ }
+
+ /* Every second packet should be dropped */
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null"));
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+
+ /* Check the final value of the counter in the cgroup local storage */
+ if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+ printf("Failed to lookup the cgroup storage\n");
+ goto err;
+ }
+
+ if (value != 7) {
+ printf("Unexpected data in the cgroup storage: %llu\n", value);
+ goto err;
+ }
+
+ error = 0;
+ printf("test_cgroup_storage:PASS\n");
+
+err:
+ cleanup_cgroup_environment();
+
+out:
+ return error;
+}
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
new file mode 100644
index 000000000..9c8b50bac
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -0,0 +1,96 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/time.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
+
+#define DEV_CGROUP_PROG "./dev_cgroup.o"
+
+#define TEST_CGROUP "/test-bpf-based-device-cgroup/"
+
+int main(int argc, char **argv)
+{
+ struct bpf_object *obj;
+ int error = EXIT_FAILURE;
+ int prog_fd, cgroup_fd;
+ __u32 prog_cnt;
+
+ if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
+ &obj, &prog_fd)) {
+ printf("Failed to load DEV_CGROUP program\n");
+ goto out;
+ }
+
+ if (setup_cgroup_environment()) {
+ printf("Failed to load DEV_CGROUP program\n");
+ goto err;
+ }
+
+ /* Create a cgroup, get fd, and join it */
+ cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+ if (!cgroup_fd) {
+ printf("Failed to create test cgroup\n");
+ goto err;
+ }
+
+ if (join_cgroup(TEST_CGROUP)) {
+ printf("Failed to join cgroup\n");
+ goto err;
+ }
+
+ /* Attach bpf program */
+ if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_DEVICE, 0)) {
+ printf("Failed to attach DEV_CGROUP program");
+ goto err;
+ }
+
+ if (bpf_prog_query(cgroup_fd, BPF_CGROUP_DEVICE, 0, NULL, NULL,
+ &prog_cnt)) {
+ printf("Failed to query attached programs");
+ goto err;
+ }
+
+ /* All operations with /dev/zero and and /dev/urandom are allowed,
+ * everything else is forbidden.
+ */
+ assert(system("rm -f /tmp/test_dev_cgroup_null") == 0);
+ assert(system("mknod /tmp/test_dev_cgroup_null c 1 3"));
+ assert(system("rm -f /tmp/test_dev_cgroup_null") == 0);
+
+ /* /dev/zero is whitelisted */
+ assert(system("rm -f /tmp/test_dev_cgroup_zero") == 0);
+ assert(system("mknod /tmp/test_dev_cgroup_zero c 1 5") == 0);
+ assert(system("rm -f /tmp/test_dev_cgroup_zero") == 0);
+
+ assert(system("dd if=/dev/urandom of=/dev/zero count=64") == 0);
+
+ /* src is allowed, target is forbidden */
+ assert(system("dd if=/dev/urandom of=/dev/full count=64"));
+
+ /* src is forbidden, target is allowed */
+ assert(system("dd if=/dev/random of=/dev/zero count=64"));
+
+ error = 0;
+ printf("test_dev_cgroup:PASS\n");
+
+err:
+ cleanup_cgroup_environment();
+
+out:
+ return error;
+}
diff --git a/tools/testing/selftests/bpf/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/test_get_stack_rawtp.c
new file mode 100644
index 000000000..f6d9f238e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_get_stack_rawtp.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* Permit pretty deep stack traces */
+#define MAX_STACK_RAWTP 100
+struct stack_trace_t {
+ int pid;
+ int kern_stack_size;
+ int user_stack_size;
+ int user_stack_buildid_size;
+ __u64 kern_stack[MAX_STACK_RAWTP];
+ __u64 user_stack[MAX_STACK_RAWTP];
+ struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
+};
+
+struct bpf_map_def SEC("maps") perfmap = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(__u32),
+ .max_entries = 2,
+};
+
+struct bpf_map_def SEC("maps") stackdata_map = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct stack_trace_t),
+ .max_entries = 1,
+};
+
+/* Allocate per-cpu space twice the needed. For the code below
+ * usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
+ * if (usize < 0)
+ * return 0;
+ * ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
+ *
+ * If we have value_size = MAX_STACK_RAWTP * sizeof(__u64),
+ * verifier will complain that access "raw_data + usize"
+ * with size "max_len - usize" may be out of bound.
+ * The maximum "raw_data + usize" is "raw_data + max_len"
+ * and the maximum "max_len - usize" is "max_len", verifier
+ * concludes that the maximum buffer access range is
+ * "raw_data[0...max_len * 2 - 1]" and hence reject the program.
+ *
+ * Doubling the to-be-used max buffer size can fix this verifier
+ * issue and avoid complicated C programming massaging.
+ * This is an acceptable workaround since there is one entry here.
+ */
+struct bpf_map_def SEC("maps") rawdata_map = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2,
+ .max_entries = 1,
+};
+
+SEC("tracepoint/raw_syscalls/sys_enter")
+int bpf_prog1(void *ctx)
+{
+ int max_len, max_buildid_len, usize, ksize, total_size;
+ struct stack_trace_t *data;
+ void *raw_data;
+ __u32 key = 0;
+
+ data = bpf_map_lookup_elem(&stackdata_map, &key);
+ if (!data)
+ return 0;
+
+ max_len = MAX_STACK_RAWTP * sizeof(__u64);
+ max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id);
+ data->pid = bpf_get_current_pid_tgid();
+ data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack,
+ max_len, 0);
+ data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len,
+ BPF_F_USER_STACK);
+ data->user_stack_buildid_size = bpf_get_stack(
+ ctx, data->user_stack_buildid, max_buildid_len,
+ BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
+ bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data));
+
+ /* write both kernel and user stacks to the same buffer */
+ raw_data = bpf_map_lookup_elem(&rawdata_map, &key);
+ if (!raw_data)
+ return 0;
+
+ usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
+ if (usize < 0)
+ return 0;
+
+ ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
+ if (ksize < 0)
+ return 0;
+
+ total_size = usize + ksize;
+ if (total_size > 0 && total_size <= max_len)
+ bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_iptunnel_common.h b/tools/testing/selftests/bpf/test_iptunnel_common.h
new file mode 100644
index 000000000..e4cd252a1
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_iptunnel_common.h
@@ -0,0 +1,37 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _TEST_IPTNL_COMMON_H
+#define _TEST_IPTNL_COMMON_H
+
+#include <linux/types.h>
+
+#define MAX_IPTNL_ENTRIES 256U
+
+struct vip {
+ union {
+ __u32 v6[4];
+ __u32 v4;
+ } daddr;
+ __u16 dport;
+ __u16 family;
+ __u8 protocol;
+};
+
+struct iptnl_info {
+ union {
+ __u32 v6[4];
+ __u32 v4;
+ } saddr;
+ union {
+ __u32 v6[4];
+ __u32 v4;
+ } daddr;
+ __u16 family;
+ __u8 dmac[6];
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh
new file mode 100755
index 000000000..9df0d2ac4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_kmod.sh
@@ -0,0 +1,61 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ "$(id -u)" != "0" ]; then
+ echo $msg please run this as root >&2
+ exit $ksft_skip
+fi
+
+SRC_TREE=../../../../
+
+test_run()
+{
+ sysctl -w net.core.bpf_jit_enable=$1 2>&1 > /dev/null
+ sysctl -w net.core.bpf_jit_harden=$2 2>&1 > /dev/null
+
+ echo "[ JIT enabled:$1 hardened:$2 ]"
+ dmesg -C
+ if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then
+ insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null
+ if [ $? -ne 0 ]; then
+ rc=1
+ fi
+ else
+ # Use modprobe dry run to check for missing test_bpf module
+ if ! /sbin/modprobe -q -n test_bpf; then
+ echo "test_bpf: [SKIP]"
+ elif /sbin/modprobe -q test_bpf; then
+ echo "test_bpf: ok"
+ else
+ echo "test_bpf: [FAIL]"
+ rc=1
+ fi
+ fi
+ rmmod test_bpf 2> /dev/null
+ dmesg | grep FAIL
+}
+
+test_save()
+{
+ JE=`sysctl -n net.core.bpf_jit_enable`
+ JH=`sysctl -n net.core.bpf_jit_harden`
+}
+
+test_restore()
+{
+ sysctl -w net.core.bpf_jit_enable=$JE 2>&1 > /dev/null
+ sysctl -w net.core.bpf_jit_harden=$JH 2>&1 > /dev/null
+}
+
+rc=0
+test_save
+test_run 0 0
+test_run 1 0
+test_run 1 1
+test_run 1 2
+test_restore
+exit $rc
diff --git a/tools/testing/selftests/bpf/test_l4lb.c b/tools/testing/selftests/bpf/test_l4lb.c
new file mode 100644
index 000000000..1e10c9590
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_l4lb.c
@@ -0,0 +1,473 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+#include "test_iptunnel_common.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+static inline __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+#define JHASH_INITVAL 0xdeadbeef
+
+typedef unsigned int u32;
+
+static inline u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const unsigned char *k = key;
+
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ while (length > 12) {
+ a += *(u32 *)(k);
+ b += *(u32 *)(k + 4);
+ c += *(u32 *)(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += initval;
+ b += initval;
+ c += initval;
+ __jhash_final(a, b, c);
+ return c;
+}
+
+static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+#define PCKT_FRAGMENTED 65343
+#define IPV4_HDR_LEN_NO_OPT 20
+#define IPV4_PLUS_ICMP_HDR 28
+#define IPV6_PLUS_ICMP_HDR 48
+#define RING_SIZE 2
+#define MAX_VIPS 12
+#define MAX_REALS 5
+#define CTL_MAP_SIZE 16
+#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
+#define F_IPV6 (1 << 0)
+#define F_HASH_NO_SRC_PORT (1 << 0)
+#define F_ICMP (1 << 0)
+#define F_SYN_SET (1 << 1)
+
+struct packet_description {
+ union {
+ __be32 src;
+ __be32 srcv6[4];
+ };
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u8 proto;
+ __u8 flags;
+};
+
+struct ctl_value {
+ union {
+ __u64 value;
+ __u32 ifindex;
+ __u8 mac[6];
+ };
+};
+
+struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+};
+
+struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+};
+
+struct vip_stats {
+ __u64 bytes;
+ __u64 pkts;
+};
+
+struct eth_hdr {
+ unsigned char eth_dest[ETH_ALEN];
+ unsigned char eth_source[ETH_ALEN];
+ unsigned short eth_proto;
+};
+
+struct bpf_map_def SEC("maps") vip_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct vip),
+ .value_size = sizeof(struct vip_meta),
+ .max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ch_rings = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = CH_RINGS_SIZE,
+};
+
+struct bpf_map_def SEC("maps") reals = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct real_definition),
+ .max_entries = MAX_REALS,
+};
+
+struct bpf_map_def SEC("maps") stats = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct vip_stats),
+ .max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ctl_array = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct ctl_value),
+ .max_entries = CTL_MAP_SIZE,
+};
+
+static __always_inline __u32 get_packet_hash(struct packet_description *pckt,
+ bool ipv6)
+{
+ if (ipv6)
+ return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
+ pckt->ports, CH_RINGS_SIZE);
+ else
+ return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
+}
+
+static __always_inline bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6)
+{
+ __u32 hash = get_packet_hash(pckt, is_ipv6) % RING_SIZE;
+ __u32 key = RING_SIZE * vip_info->vip_num + hash;
+ __u32 *real_pos;
+
+ real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+ if (!real_pos)
+ return false;
+ key = *real_pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+ if (!(*real))
+ return false;
+ return true;
+}
+
+static __always_inline int parse_icmpv6(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
+ return TC_ACT_OK;
+ off += sizeof(struct icmp6hdr);
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+ pckt->proto = ip6h->nexthdr;
+ pckt->flags |= F_ICMP;
+ memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
+ memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
+ return TC_ACT_UNSPEC;
+}
+
+static __always_inline int parse_icmp(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmphdr *icmp_hdr;
+ struct iphdr *iph;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->type != ICMP_DEST_UNREACH ||
+ icmp_hdr->code != ICMP_FRAG_NEEDED)
+ return TC_ACT_OK;
+ off += sizeof(struct icmphdr);
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (iph->ihl != 5)
+ return TC_ACT_SHOT;
+ pckt->proto = iph->protocol;
+ pckt->flags |= F_ICMP;
+ pckt->src = iph->daddr;
+ pckt->dst = iph->saddr;
+ return TC_ACT_UNSPEC;
+}
+
+static __always_inline bool parse_udp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
+{
+ struct udphdr *udp;
+ udp = data + off;
+
+ if (udp + 1 > data_end)
+ return false;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = udp->source;
+ pckt->port16[1] = udp->dest;
+ } else {
+ pckt->port16[0] = udp->dest;
+ pckt->port16[1] = udp->source;
+ }
+ return true;
+}
+
+static __always_inline bool parse_tcp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
+{
+ struct tcphdr *tcp;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return false;
+
+ if (tcp->syn)
+ pckt->flags |= F_SYN_SET;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = tcp->source;
+ pckt->port16[1] = tcp->dest;
+ } else {
+ pckt->port16[0] = tcp->dest;
+ pckt->port16[1] = tcp->source;
+ }
+ return true;
+}
+
+static __always_inline int process_packet(void *data, __u64 off, void *data_end,
+ bool is_ipv6, struct __sk_buff *skb)
+{
+ void *pkt_start = (void *)(long)skb->data;
+ struct packet_description pckt = {};
+ struct eth_hdr *eth = pkt_start;
+ struct bpf_tunnel_key tkey = {};
+ struct vip_stats *data_stats;
+ struct real_definition *dst;
+ struct vip_meta *vip_info;
+ struct ctl_value *cval;
+ __u32 v4_intf_pos = 1;
+ __u32 v6_intf_pos = 2;
+ struct ipv6hdr *ip6h;
+ struct vip vip = {};
+ struct iphdr *iph;
+ int tun_flag = 0;
+ __u16 pkt_bytes;
+ __u64 iph_len;
+ __u32 ifindex;
+ __u8 protocol;
+ __u32 vip_num;
+ int action;
+
+ tkey.tunnel_ttl = 64;
+ if (is_ipv6) {
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ iph_len = sizeof(struct ipv6hdr);
+ protocol = ip6h->nexthdr;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(ip6h->payload_len);
+ off += iph_len;
+ if (protocol == IPPROTO_FRAGMENT) {
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_ICMPV6) {
+ action = parse_icmpv6(data, data_end, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV6_PLUS_ICMP_HDR;
+ } else {
+ memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
+ memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
+ }
+ } else {
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (iph->ihl != 5)
+ return TC_ACT_SHOT;
+
+ protocol = iph->protocol;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(iph->tot_len);
+ off += IPV4_HDR_LEN_NO_OPT;
+
+ if (iph->frag_off & PCKT_FRAGMENTED)
+ return TC_ACT_SHOT;
+ if (protocol == IPPROTO_ICMP) {
+ action = parse_icmp(data, data_end, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV4_PLUS_ICMP_HDR;
+ } else {
+ pckt.src = iph->saddr;
+ pckt.dst = iph->daddr;
+ }
+ }
+ protocol = pckt.proto;
+
+ if (protocol == IPPROTO_TCP) {
+ if (!parse_tcp(data, off, data_end, &pckt))
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_UDP) {
+ if (!parse_udp(data, off, data_end, &pckt))
+ return TC_ACT_SHOT;
+ } else {
+ return TC_ACT_SHOT;
+ }
+
+ if (is_ipv6)
+ memcpy(vip.daddr.v6, pckt.dstv6, 16);
+ else
+ vip.daddr.v4 = pckt.dst;
+
+ vip.dport = pckt.port16[1];
+ vip.protocol = pckt.proto;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info) {
+ vip.dport = 0;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info)
+ return TC_ACT_SHOT;
+ pckt.port16[1] = 0;
+ }
+
+ if (vip_info->flags & F_HASH_NO_SRC_PORT)
+ pckt.port16[0] = 0;
+
+ if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
+ return TC_ACT_SHOT;
+
+ if (dst->flags & F_IPV6) {
+ cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ memcpy(tkey.remote_ipv6, dst->dstv6, 16);
+ tun_flag = BPF_F_TUNINFO_IPV6;
+ } else {
+ cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ tkey.remote_ipv4 = dst->dst;
+ }
+ vip_num = vip_info->vip_num;
+ data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+ if (!data_stats)
+ return TC_ACT_SHOT;
+ data_stats->pkts++;
+ data_stats->bytes += pkt_bytes;
+ bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
+ *(u32 *)eth->eth_dest = tkey.remote_ipv4;
+ return bpf_redirect(ifindex, 0);
+}
+
+SEC("l4lb-demo")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct eth_hdr *eth = data;
+ __u32 eth_proto;
+ __u32 nh_off;
+
+ nh_off = sizeof(struct eth_hdr);
+ if (data + nh_off > data_end)
+ return TC_ACT_SHOT;
+ eth_proto = eth->eth_proto;
+ if (eth_proto == bpf_htons(ETH_P_IP))
+ return process_packet(data, nh_off, data_end, false, ctx);
+ else if (eth_proto == bpf_htons(ETH_P_IPV6))
+ return process_packet(data, nh_off, data_end, true, ctx);
+ else
+ return TC_ACT_SHOT;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_l4lb_noinline.c b/tools/testing/selftests/bpf/test_l4lb_noinline.c
new file mode 100644
index 000000000..ba44a14e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_l4lb_noinline.c
@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+#include "test_iptunnel_common.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+static __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+#define JHASH_INITVAL 0xdeadbeef
+
+typedef unsigned int u32;
+
+static u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const unsigned char *k = key;
+
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ while (length > 12) {
+ a += *(u32 *)(k);
+ b += *(u32 *)(k + 4);
+ c += *(u32 *)(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += initval;
+ b += initval;
+ c += initval;
+ __jhash_final(a, b, c);
+ return c;
+}
+
+static u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+#define PCKT_FRAGMENTED 65343
+#define IPV4_HDR_LEN_NO_OPT 20
+#define IPV4_PLUS_ICMP_HDR 28
+#define IPV6_PLUS_ICMP_HDR 48
+#define RING_SIZE 2
+#define MAX_VIPS 12
+#define MAX_REALS 5
+#define CTL_MAP_SIZE 16
+#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
+#define F_IPV6 (1 << 0)
+#define F_HASH_NO_SRC_PORT (1 << 0)
+#define F_ICMP (1 << 0)
+#define F_SYN_SET (1 << 1)
+
+struct packet_description {
+ union {
+ __be32 src;
+ __be32 srcv6[4];
+ };
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u8 proto;
+ __u8 flags;
+};
+
+struct ctl_value {
+ union {
+ __u64 value;
+ __u32 ifindex;
+ __u8 mac[6];
+ };
+};
+
+struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+};
+
+struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+};
+
+struct vip_stats {
+ __u64 bytes;
+ __u64 pkts;
+};
+
+struct eth_hdr {
+ unsigned char eth_dest[ETH_ALEN];
+ unsigned char eth_source[ETH_ALEN];
+ unsigned short eth_proto;
+};
+
+struct bpf_map_def SEC("maps") vip_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct vip),
+ .value_size = sizeof(struct vip_meta),
+ .max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ch_rings = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = CH_RINGS_SIZE,
+};
+
+struct bpf_map_def SEC("maps") reals = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct real_definition),
+ .max_entries = MAX_REALS,
+};
+
+struct bpf_map_def SEC("maps") stats = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct vip_stats),
+ .max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ctl_array = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct ctl_value),
+ .max_entries = CTL_MAP_SIZE,
+};
+
+static __u32 get_packet_hash(struct packet_description *pckt,
+ bool ipv6)
+{
+ if (ipv6)
+ return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
+ pckt->ports, CH_RINGS_SIZE);
+ else
+ return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
+}
+
+static bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6)
+{
+ __u32 hash = get_packet_hash(pckt, is_ipv6);
+ __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
+ __u32 *real_pos;
+
+ if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
+ hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
+ return 0;
+
+ real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+ if (!real_pos)
+ return false;
+ key = *real_pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+ if (!(*real))
+ return false;
+ return true;
+}
+
+static int parse_icmpv6(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
+ return TC_ACT_OK;
+ off += sizeof(struct icmp6hdr);
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+ pckt->proto = ip6h->nexthdr;
+ pckt->flags |= F_ICMP;
+ memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
+ memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
+ return TC_ACT_UNSPEC;
+}
+
+static int parse_icmp(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmphdr *icmp_hdr;
+ struct iphdr *iph;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->type != ICMP_DEST_UNREACH ||
+ icmp_hdr->code != ICMP_FRAG_NEEDED)
+ return TC_ACT_OK;
+ off += sizeof(struct icmphdr);
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (iph->ihl != 5)
+ return TC_ACT_SHOT;
+ pckt->proto = iph->protocol;
+ pckt->flags |= F_ICMP;
+ pckt->src = iph->daddr;
+ pckt->dst = iph->saddr;
+ return TC_ACT_UNSPEC;
+}
+
+static bool parse_udp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
+{
+ struct udphdr *udp;
+ udp = data + off;
+
+ if (udp + 1 > data_end)
+ return false;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = udp->source;
+ pckt->port16[1] = udp->dest;
+ } else {
+ pckt->port16[0] = udp->dest;
+ pckt->port16[1] = udp->source;
+ }
+ return true;
+}
+
+static bool parse_tcp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
+{
+ struct tcphdr *tcp;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return false;
+
+ if (tcp->syn)
+ pckt->flags |= F_SYN_SET;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = tcp->source;
+ pckt->port16[1] = tcp->dest;
+ } else {
+ pckt->port16[0] = tcp->dest;
+ pckt->port16[1] = tcp->source;
+ }
+ return true;
+}
+
+static int process_packet(void *data, __u64 off, void *data_end,
+ bool is_ipv6, struct __sk_buff *skb)
+{
+ void *pkt_start = (void *)(long)skb->data;
+ struct packet_description pckt = {};
+ struct eth_hdr *eth = pkt_start;
+ struct bpf_tunnel_key tkey = {};
+ struct vip_stats *data_stats;
+ struct real_definition *dst;
+ struct vip_meta *vip_info;
+ struct ctl_value *cval;
+ __u32 v4_intf_pos = 1;
+ __u32 v6_intf_pos = 2;
+ struct ipv6hdr *ip6h;
+ struct vip vip = {};
+ struct iphdr *iph;
+ int tun_flag = 0;
+ __u16 pkt_bytes;
+ __u64 iph_len;
+ __u32 ifindex;
+ __u8 protocol;
+ __u32 vip_num;
+ int action;
+
+ tkey.tunnel_ttl = 64;
+ if (is_ipv6) {
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ iph_len = sizeof(struct ipv6hdr);
+ protocol = ip6h->nexthdr;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(ip6h->payload_len);
+ off += iph_len;
+ if (protocol == IPPROTO_FRAGMENT) {
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_ICMPV6) {
+ action = parse_icmpv6(data, data_end, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV6_PLUS_ICMP_HDR;
+ } else {
+ memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
+ memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
+ }
+ } else {
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (iph->ihl != 5)
+ return TC_ACT_SHOT;
+
+ protocol = iph->protocol;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(iph->tot_len);
+ off += IPV4_HDR_LEN_NO_OPT;
+
+ if (iph->frag_off & PCKT_FRAGMENTED)
+ return TC_ACT_SHOT;
+ if (protocol == IPPROTO_ICMP) {
+ action = parse_icmp(data, data_end, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV4_PLUS_ICMP_HDR;
+ } else {
+ pckt.src = iph->saddr;
+ pckt.dst = iph->daddr;
+ }
+ }
+ protocol = pckt.proto;
+
+ if (protocol == IPPROTO_TCP) {
+ if (!parse_tcp(data, off, data_end, &pckt))
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_UDP) {
+ if (!parse_udp(data, off, data_end, &pckt))
+ return TC_ACT_SHOT;
+ } else {
+ return TC_ACT_SHOT;
+ }
+
+ if (is_ipv6)
+ memcpy(vip.daddr.v6, pckt.dstv6, 16);
+ else
+ vip.daddr.v4 = pckt.dst;
+
+ vip.dport = pckt.port16[1];
+ vip.protocol = pckt.proto;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info) {
+ vip.dport = 0;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info)
+ return TC_ACT_SHOT;
+ pckt.port16[1] = 0;
+ }
+
+ if (vip_info->flags & F_HASH_NO_SRC_PORT)
+ pckt.port16[0] = 0;
+
+ if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
+ return TC_ACT_SHOT;
+
+ if (dst->flags & F_IPV6) {
+ cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ memcpy(tkey.remote_ipv6, dst->dstv6, 16);
+ tun_flag = BPF_F_TUNINFO_IPV6;
+ } else {
+ cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ tkey.remote_ipv4 = dst->dst;
+ }
+ vip_num = vip_info->vip_num;
+ data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+ if (!data_stats)
+ return TC_ACT_SHOT;
+ data_stats->pkts++;
+ data_stats->bytes += pkt_bytes;
+ bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
+ *(u32 *)eth->eth_dest = tkey.remote_ipv4;
+ return bpf_redirect(ifindex, 0);
+}
+
+SEC("l4lb-demo")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct eth_hdr *eth = data;
+ __u32 eth_proto;
+ __u32 nh_off;
+
+ nh_off = sizeof(struct eth_hdr);
+ if (data + nh_off > data_end)
+ return TC_ACT_SHOT;
+ eth_proto = eth->eth_proto;
+ if (eth_proto == bpf_htons(ETH_P_IP))
+ return process_packet(data, nh_off, data_end, false, ctx);
+ else if (eth_proto == bpf_htons(ETH_P_IPV6))
+ return process_packet(data, nh_off, data_end, true, ctx);
+ else
+ return TC_ACT_SHOT;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh
new file mode 100755
index 000000000..2989b2e2d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_libbpf.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+export TESTNAME=test_libbpf
+
+# Determine selftest success via shell exit code
+exit_handler()
+{
+ if [ $? -eq 0 ]; then
+ echo "selftests: $TESTNAME [PASS]";
+ else
+ echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2
+ echo "selftests: $TESTNAME [FAILED]";
+ fi
+}
+
+libbpf_open_file()
+{
+ LAST_LOADED=$1
+ if [ -n "$VERBOSE" ]; then
+ ./test_libbpf_open $1
+ else
+ ./test_libbpf_open --quiet $1
+ fi
+}
+
+# Exit script immediately (well catched by trap handler) if any
+# program/thing exits with a non-zero status.
+set -e
+
+# (Use 'trap -l' to list meaning of numbers)
+trap exit_handler 0 2 3 6 9
+
+libbpf_open_file test_l4lb.o
+
+# Load a program with BPF-to-BPF calls
+libbpf_open_file test_l4lb_noinline.o
+
+# Load a program compiled without the "-target bpf" flag
+libbpf_open_file test_xdp.o
+
+# Success
+exit 0
diff --git a/tools/testing/selftests/bpf/test_libbpf_open.c b/tools/testing/selftests/bpf/test_libbpf_open.c
new file mode 100644
index 000000000..cbd55f5f8
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_libbpf_open.c
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
+ */
+static const char *__doc__ =
+ "Libbpf test program for loading BPF ELF object files";
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <bpf/libbpf.h>
+#include <getopt.h>
+
+#include "bpf_rlimit.h"
+
+static const struct option long_options[] = {
+ {"help", no_argument, NULL, 'h' },
+ {"debug", no_argument, NULL, 'D' },
+ {"quiet", no_argument, NULL, 'q' },
+ {0, 0, NULL, 0 }
+};
+
+static void usage(char *argv[])
+{
+ int i;
+
+ printf("\nDOCUMENTATION:\n%s\n\n", __doc__);
+ printf(" Usage: %s (options-see-below) BPF_FILE\n", argv[0]);
+ printf(" Listing options:\n");
+ for (i = 0; long_options[i].name != 0; i++) {
+ printf(" --%-12s", long_options[i].name);
+ printf(" short-option: -%c",
+ long_options[i].val);
+ printf("\n");
+ }
+ printf("\n");
+}
+
+#define DEFINE_PRINT_FN(name, enabled) \
+static int libbpf_##name(const char *fmt, ...) \
+{ \
+ va_list args; \
+ int ret; \
+ \
+ va_start(args, fmt); \
+ if (enabled) { \
+ fprintf(stderr, "[" #name "] "); \
+ ret = vfprintf(stderr, fmt, args); \
+ } \
+ va_end(args); \
+ return ret; \
+}
+DEFINE_PRINT_FN(warning, 1)
+DEFINE_PRINT_FN(info, 1)
+DEFINE_PRINT_FN(debug, 1)
+
+#define EXIT_FAIL_LIBBPF EXIT_FAILURE
+#define EXIT_FAIL_OPTION 2
+
+int test_walk_progs(struct bpf_object *obj, bool verbose)
+{
+ struct bpf_program *prog;
+ int cnt = 0;
+
+ bpf_object__for_each_program(prog, obj) {
+ cnt++;
+ if (verbose)
+ printf("Prog (count:%d) section_name: %s\n", cnt,
+ bpf_program__title(prog, false));
+ }
+ return 0;
+}
+
+int test_walk_maps(struct bpf_object *obj, bool verbose)
+{
+ struct bpf_map *map;
+ int cnt = 0;
+
+ bpf_map__for_each(map, obj) {
+ cnt++;
+ if (verbose)
+ printf("Map (count:%d) name: %s\n", cnt,
+ bpf_map__name(map));
+ }
+ return 0;
+}
+
+int test_open_file(char *filename, bool verbose)
+{
+ struct bpf_object *bpfobj = NULL;
+ long err;
+
+ if (verbose)
+ printf("Open BPF ELF-file with libbpf: %s\n", filename);
+
+ /* Load BPF ELF object file and check for errors */
+ bpfobj = bpf_object__open(filename);
+ err = libbpf_get_error(bpfobj);
+ if (err) {
+ char err_buf[128];
+ libbpf_strerror(err, err_buf, sizeof(err_buf));
+ if (verbose)
+ printf("Unable to load eBPF objects in file '%s': %s\n",
+ filename, err_buf);
+ return EXIT_FAIL_LIBBPF;
+ }
+ test_walk_progs(bpfobj, verbose);
+ test_walk_maps(bpfobj, verbose);
+
+ if (verbose)
+ printf("Close BPF ELF-file with libbpf: %s\n",
+ bpf_object__name(bpfobj));
+ bpf_object__close(bpfobj);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ char filename[1024] = { 0 };
+ bool verbose = 1;
+ int longindex = 0;
+ int opt;
+
+ libbpf_set_print(libbpf_warning, libbpf_info, NULL);
+
+ /* Parse commands line args */
+ while ((opt = getopt_long(argc, argv, "hDq",
+ long_options, &longindex)) != -1) {
+ switch (opt) {
+ case 'D':
+ libbpf_set_print(libbpf_warning, libbpf_info,
+ libbpf_debug);
+ break;
+ case 'q': /* Use in scripting mode */
+ verbose = 0;
+ break;
+ case 'h':
+ default:
+ usage(argv);
+ return EXIT_FAIL_OPTION;
+ }
+ }
+ if (optind >= argc) {
+ usage(argv);
+ printf("ERROR: Expected BPF_FILE argument after options\n");
+ return EXIT_FAIL_OPTION;
+ }
+ snprintf(filename, sizeof(filename), "%s", argv[optind]);
+
+ return test_open_file(filename, verbose);
+}
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh
new file mode 100755
index 000000000..795e56e3e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=$ksft_skip
+
+msg="skip all tests:"
+if [ $UID != 0 ]; then
+ echo $msg please run this as root >&2
+ exit $ksft_skip
+fi
+
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+modprobe rc-loopback
+
+for i in /sys/class/rc/rc*
+do
+ if grep -q DRV_NAME=rc-loopback $i/uevent
+ then
+ LIRCDEV=$(grep DEVNAME= $i/lirc*/uevent | sed sQDEVNAME=Q/dev/Q)
+ fi
+done
+
+if [ -n "$LIRCDEV" ];
+then
+ TYPE=lirc_mode2
+ ./test_lirc_mode2_user $LIRCDEV
+ ret=$?
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ else
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+ fi
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c
new file mode 100644
index 000000000..ba2685556
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// test ir decoder
+//
+// Copyright (C) 2018 Sean Young <sean@mess.org>
+
+#include <linux/bpf.h>
+#include <linux/lirc.h>
+#include "bpf_helpers.h"
+
+SEC("lirc_mode2")
+int bpf_decoder(unsigned int *sample)
+{
+ if (LIRC_IS_PULSE(*sample)) {
+ unsigned int duration = LIRC_VALUE(*sample);
+
+ if (duration & 0x10000)
+ bpf_rc_keydown(sample, 0x40, duration & 0xffff, 0);
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
new file mode 100644
index 000000000..d470d63c3
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+// test ir decoder
+//
+// Copyright (C) 2018 Sean Young <sean@mess.org>
+
+// A lirc chardev is a device representing a consumer IR (cir) device which
+// can receive infrared signals from remote control and/or transmit IR.
+//
+// IR is sent as a series of pulses and space somewhat like morse code. The
+// BPF program can decode this into scancodes so that rc-core can translate
+// this into input key codes using the rc keymap.
+//
+// This test works by sending IR over rc-loopback, so the IR is processed by
+// BPF and then decoded into scancodes. The lirc chardev must be the one
+// associated with rc-loopback, see the output of ir-keytable(1).
+//
+// The following CONFIG options must be enabled for the test to succeed:
+// CONFIG_RC_CORE=y
+// CONFIG_BPF_RAWIR_EVENT=y
+// CONFIG_RC_LOOPBACK=y
+
+// Steps:
+// 1. Open the /dev/lircN device for rc-loopback (given on command line)
+// 2. Attach bpf_lirc_mode2 program which decodes some IR.
+// 3. Send some IR to the same IR device; since it is loopback, this will
+// end up in the bpf program
+// 4. bpf program should decode IR and report keycode
+// 5. We can read keycode from same /dev/lirc device
+
+#include <linux/bpf.h>
+#include <linux/lirc.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <poll.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+int main(int argc, char **argv)
+{
+ struct bpf_object *obj;
+ int ret, lircfd, progfd, mode;
+ int testir = 0x1dead;
+ u32 prog_ids[10], prog_flags[10], prog_cnt;
+
+ if (argc != 2) {
+ printf("Usage: %s /dev/lircN\n", argv[0]);
+ return 2;
+ }
+
+ ret = bpf_prog_load("test_lirc_mode2_kern.o",
+ BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd);
+ if (ret) {
+ printf("Failed to load bpf program\n");
+ return 1;
+ }
+
+ lircfd = open(argv[1], O_RDWR | O_NONBLOCK);
+ if (lircfd == -1) {
+ printf("failed to open lirc device %s: %m\n", argv[1]);
+ return 1;
+ }
+
+ /* Let's try detach it before it was ever attached */
+ ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
+ if (ret != -1 || errno != ENOENT) {
+ printf("bpf_prog_detach2 not attached should fail: %m\n");
+ return 1;
+ }
+
+ mode = LIRC_MODE_SCANCODE;
+ if (ioctl(lircfd, LIRC_SET_REC_MODE, &mode)) {
+ printf("failed to set rec mode: %m\n");
+ return 1;
+ }
+
+ prog_cnt = 10;
+ ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids,
+ &prog_cnt);
+ if (ret) {
+ printf("Failed to query bpf programs on lirc device: %m\n");
+ return 1;
+ }
+
+ if (prog_cnt != 0) {
+ printf("Expected nothing to be attached\n");
+ return 1;
+ }
+
+ ret = bpf_prog_attach(progfd, lircfd, BPF_LIRC_MODE2, 0);
+ if (ret) {
+ printf("Failed to attach bpf to lirc device: %m\n");
+ return 1;
+ }
+
+ /* Write raw IR */
+ ret = write(lircfd, &testir, sizeof(testir));
+ if (ret != sizeof(testir)) {
+ printf("Failed to send test IR message: %m\n");
+ return 1;
+ }
+
+ struct pollfd pfd = { .fd = lircfd, .events = POLLIN };
+ struct lirc_scancode lsc;
+
+ poll(&pfd, 1, 100);
+
+ /* Read decoded IR */
+ ret = read(lircfd, &lsc, sizeof(lsc));
+ if (ret != sizeof(lsc)) {
+ printf("Failed to read decoded IR: %m\n");
+ return 1;
+ }
+
+ if (lsc.scancode != 0xdead || lsc.rc_proto != 64) {
+ printf("Incorrect scancode decoded\n");
+ return 1;
+ }
+
+ prog_cnt = 10;
+ ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids,
+ &prog_cnt);
+ if (ret) {
+ printf("Failed to query bpf programs on lirc device: %m\n");
+ return 1;
+ }
+
+ if (prog_cnt != 1) {
+ printf("Expected one program to be attached\n");
+ return 1;
+ }
+
+ /* Let's try detaching it now it is actually attached */
+ ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
+ if (ret) {
+ printf("bpf_prog_detach2: returned %m\n");
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
new file mode 100644
index 000000000..006be3963
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -0,0 +1,804 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Randomized tests for eBPF longest-prefix-match maps
+ *
+ * This program runs randomized tests against the lpm-bpf-map. It implements a
+ * "Trivial Longest Prefix Match" (tlpm) based on simple, linear, singly linked
+ * lists. The implementation should be pretty straightforward.
+ *
+ * Based on tlpm, this inserts randomized data into bpf-lpm-maps and verifies
+ * the trie-based bpf-map implementation behaves the same way as tlpm.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/bpf.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+
+#include <bpf/bpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+
+struct tlpm_node {
+ struct tlpm_node *next;
+ size_t n_bits;
+ uint8_t key[];
+};
+
+static struct tlpm_node *tlpm_match(struct tlpm_node *list,
+ const uint8_t *key,
+ size_t n_bits);
+
+static struct tlpm_node *tlpm_add(struct tlpm_node *list,
+ const uint8_t *key,
+ size_t n_bits)
+{
+ struct tlpm_node *node;
+ size_t n;
+
+ n = (n_bits + 7) / 8;
+
+ /* 'overwrite' an equivalent entry if one already exists */
+ node = tlpm_match(list, key, n_bits);
+ if (node && node->n_bits == n_bits) {
+ memcpy(node->key, key, n);
+ return list;
+ }
+
+ /* add new entry with @key/@n_bits to @list and return new head */
+
+ node = malloc(sizeof(*node) + n);
+ assert(node);
+
+ node->next = list;
+ node->n_bits = n_bits;
+ memcpy(node->key, key, n);
+
+ return node;
+}
+
+static void tlpm_clear(struct tlpm_node *list)
+{
+ struct tlpm_node *node;
+
+ /* free all entries in @list */
+
+ while ((node = list)) {
+ list = list->next;
+ free(node);
+ }
+}
+
+static struct tlpm_node *tlpm_match(struct tlpm_node *list,
+ const uint8_t *key,
+ size_t n_bits)
+{
+ struct tlpm_node *best = NULL;
+ size_t i;
+
+ /* Perform longest prefix-match on @key/@n_bits. That is, iterate all
+ * entries and match each prefix against @key. Remember the "best"
+ * entry we find (i.e., the longest prefix that matches) and return it
+ * to the caller when done.
+ */
+
+ for ( ; list; list = list->next) {
+ for (i = 0; i < n_bits && i < list->n_bits; ++i) {
+ if ((key[i / 8] & (1 << (7 - i % 8))) !=
+ (list->key[i / 8] & (1 << (7 - i % 8))))
+ break;
+ }
+
+ if (i >= list->n_bits) {
+ if (!best || i > best->n_bits)
+ best = list;
+ }
+ }
+
+ return best;
+}
+
+static struct tlpm_node *tlpm_delete(struct tlpm_node *list,
+ const uint8_t *key,
+ size_t n_bits)
+{
+ struct tlpm_node *best = tlpm_match(list, key, n_bits);
+ struct tlpm_node *node;
+
+ if (!best || best->n_bits != n_bits)
+ return list;
+
+ if (best == list) {
+ node = best->next;
+ free(best);
+ return node;
+ }
+
+ for (node = list; node; node = node->next) {
+ if (node->next == best) {
+ node->next = best->next;
+ free(best);
+ return list;
+ }
+ }
+ /* should never get here */
+ assert(0);
+ return list;
+}
+
+static void test_lpm_basic(void)
+{
+ struct tlpm_node *list = NULL, *t1, *t2;
+
+ /* very basic, static tests to verify tlpm works as expected */
+
+ assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+
+ t1 = list = tlpm_add(list, (uint8_t[]){ 0xff }, 8);
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0x00 }, 16));
+ assert(!tlpm_match(list, (uint8_t[]){ 0x7f }, 8));
+ assert(!tlpm_match(list, (uint8_t[]){ 0xfe }, 8));
+ assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 7));
+
+ t2 = list = tlpm_add(list, (uint8_t[]){ 0xff, 0xff }, 16);
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+ assert(t2 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 15));
+ assert(!tlpm_match(list, (uint8_t[]){ 0x7f, 0xff }, 16));
+
+ list = tlpm_delete(list, (uint8_t[]){ 0xff, 0xff }, 16);
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
+
+ list = tlpm_delete(list, (uint8_t[]){ 0xff }, 8);
+ assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+
+ tlpm_clear(list);
+}
+
+static void test_lpm_order(void)
+{
+ struct tlpm_node *t1, *t2, *l1 = NULL, *l2 = NULL;
+ size_t i, j;
+
+ /* Verify the tlpm implementation works correctly regardless of the
+ * order of entries. Insert a random set of entries into @l1, and copy
+ * the same data in reverse order into @l2. Then verify a lookup of
+ * random keys will yield the same result in both sets.
+ */
+
+ for (i = 0; i < (1 << 12); ++i)
+ l1 = tlpm_add(l1, (uint8_t[]){
+ rand() % 0xff,
+ rand() % 0xff,
+ }, rand() % 16 + 1);
+
+ for (t1 = l1; t1; t1 = t1->next)
+ l2 = tlpm_add(l2, t1->key, t1->n_bits);
+
+ for (i = 0; i < (1 << 8); ++i) {
+ uint8_t key[] = { rand() % 0xff, rand() % 0xff };
+
+ t1 = tlpm_match(l1, key, 16);
+ t2 = tlpm_match(l2, key, 16);
+
+ assert(!t1 == !t2);
+ if (t1) {
+ assert(t1->n_bits == t2->n_bits);
+ for (j = 0; j < t1->n_bits; ++j)
+ assert((t1->key[j / 8] & (1 << (7 - j % 8))) ==
+ (t2->key[j / 8] & (1 << (7 - j % 8))));
+ }
+ }
+
+ tlpm_clear(l1);
+ tlpm_clear(l2);
+}
+
+static void test_lpm_map(int keysize)
+{
+ size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups;
+ struct tlpm_node *t, *list = NULL;
+ struct bpf_lpm_trie_key *key;
+ uint8_t *data, *value;
+ int r, map;
+
+ /* Compare behavior of tlpm vs. bpf-lpm. Create a randomized set of
+ * prefixes and insert it into both tlpm and bpf-lpm. Then run some
+ * randomized lookups and verify both maps return the same result.
+ */
+
+ n_matches = 0;
+ n_matches_after_delete = 0;
+ n_nodes = 1 << 8;
+ n_lookups = 1 << 16;
+
+ data = alloca(keysize);
+ memset(data, 0, keysize);
+
+ value = alloca(keysize + 1);
+ memset(value, 0, keysize + 1);
+
+ key = alloca(sizeof(*key) + keysize);
+ memset(key, 0, sizeof(*key) + keysize);
+
+ map = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ sizeof(*key) + keysize,
+ keysize + 1,
+ 4096,
+ BPF_F_NO_PREALLOC);
+ assert(map >= 0);
+
+ for (i = 0; i < n_nodes; ++i) {
+ for (j = 0; j < keysize; ++j)
+ value[j] = rand() & 0xff;
+ value[keysize] = rand() % (8 * keysize + 1);
+
+ list = tlpm_add(list, value, value[keysize]);
+
+ key->prefixlen = value[keysize];
+ memcpy(key->data, value, keysize);
+ r = bpf_map_update_elem(map, key, value, 0);
+ assert(!r);
+ }
+
+ for (i = 0; i < n_lookups; ++i) {
+ for (j = 0; j < keysize; ++j)
+ data[j] = rand() & 0xff;
+
+ t = tlpm_match(list, data, 8 * keysize);
+
+ key->prefixlen = 8 * keysize;
+ memcpy(key->data, data, keysize);
+ r = bpf_map_lookup_elem(map, key, value);
+ assert(!r || errno == ENOENT);
+ assert(!t == !!r);
+
+ if (t) {
+ ++n_matches;
+ assert(t->n_bits == value[keysize]);
+ for (j = 0; j < t->n_bits; ++j)
+ assert((t->key[j / 8] & (1 << (7 - j % 8))) ==
+ (value[j / 8] & (1 << (7 - j % 8))));
+ }
+ }
+
+ /* Remove the first half of the elements in the tlpm and the
+ * corresponding nodes from the bpf-lpm. Then run the same
+ * large number of random lookups in both and make sure they match.
+ * Note: we need to count the number of nodes actually inserted
+ * since there may have been duplicates.
+ */
+ for (i = 0, t = list; t; i++, t = t->next)
+ ;
+ for (j = 0; j < i / 2; ++j) {
+ key->prefixlen = list->n_bits;
+ memcpy(key->data, list->key, keysize);
+ r = bpf_map_delete_elem(map, key);
+ assert(!r);
+ list = tlpm_delete(list, list->key, list->n_bits);
+ assert(list);
+ }
+ for (i = 0; i < n_lookups; ++i) {
+ for (j = 0; j < keysize; ++j)
+ data[j] = rand() & 0xff;
+
+ t = tlpm_match(list, data, 8 * keysize);
+
+ key->prefixlen = 8 * keysize;
+ memcpy(key->data, data, keysize);
+ r = bpf_map_lookup_elem(map, key, value);
+ assert(!r || errno == ENOENT);
+ assert(!t == !!r);
+
+ if (t) {
+ ++n_matches_after_delete;
+ assert(t->n_bits == value[keysize]);
+ for (j = 0; j < t->n_bits; ++j)
+ assert((t->key[j / 8] & (1 << (7 - j % 8))) ==
+ (value[j / 8] & (1 << (7 - j % 8))));
+ }
+ }
+
+ close(map);
+ tlpm_clear(list);
+
+ /* With 255 random nodes in the map, we are pretty likely to match
+ * something on every lookup. For statistics, use this:
+ *
+ * printf(" nodes: %zu\n"
+ * " lookups: %zu\n"
+ * " matches: %zu\n"
+ * "matches(delete): %zu\n",
+ * n_nodes, n_lookups, n_matches, n_matches_after_delete);
+ */
+}
+
+/* Test the implementation with some 'real world' examples */
+
+static void test_lpm_ipaddr(void)
+{
+ struct bpf_lpm_trie_key *key_ipv4;
+ struct bpf_lpm_trie_key *key_ipv6;
+ size_t key_size_ipv4;
+ size_t key_size_ipv6;
+ int map_fd_ipv4;
+ int map_fd_ipv6;
+ __u64 value;
+
+ key_size_ipv4 = sizeof(*key_ipv4) + sizeof(__u32);
+ key_size_ipv6 = sizeof(*key_ipv6) + sizeof(__u32) * 4;
+ key_ipv4 = alloca(key_size_ipv4);
+ key_ipv6 = alloca(key_size_ipv6);
+
+ map_fd_ipv4 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ key_size_ipv4, sizeof(value),
+ 100, BPF_F_NO_PREALLOC);
+ assert(map_fd_ipv4 >= 0);
+
+ map_fd_ipv6 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ key_size_ipv6, sizeof(value),
+ 100, BPF_F_NO_PREALLOC);
+ assert(map_fd_ipv6 >= 0);
+
+ /* Fill data some IPv4 and IPv6 address ranges */
+ value = 1;
+ key_ipv4->prefixlen = 16;
+ inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
+ assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+ value = 2;
+ key_ipv4->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
+ assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+ value = 3;
+ key_ipv4->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.128.0", key_ipv4->data);
+ assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+ value = 5;
+ key_ipv4->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.1.0", key_ipv4->data);
+ assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+ value = 4;
+ key_ipv4->prefixlen = 23;
+ inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
+ assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+ value = 0xdeadbeef;
+ key_ipv6->prefixlen = 64;
+ inet_pton(AF_INET6, "2a00:1450:4001:814::200e", key_ipv6->data);
+ assert(bpf_map_update_elem(map_fd_ipv6, key_ipv6, &value, 0) == 0);
+
+ /* Set tprefixlen to maximum for lookups */
+ key_ipv4->prefixlen = 32;
+ key_ipv6->prefixlen = 128;
+
+ /* Test some lookups that should come back with a value */
+ inet_pton(AF_INET, "192.168.128.23", key_ipv4->data);
+ assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == 0);
+ assert(value == 3);
+
+ inet_pton(AF_INET, "192.168.0.1", key_ipv4->data);
+ assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == 0);
+ assert(value == 2);
+
+ inet_pton(AF_INET6, "2a00:1450:4001:814::", key_ipv6->data);
+ assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == 0);
+ assert(value == 0xdeadbeef);
+
+ inet_pton(AF_INET6, "2a00:1450:4001:814::1", key_ipv6->data);
+ assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == 0);
+ assert(value == 0xdeadbeef);
+
+ /* Test some lookups that should not match any entry */
+ inet_pton(AF_INET, "10.0.0.1", key_ipv4->data);
+ assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
+ errno == ENOENT);
+
+ inet_pton(AF_INET, "11.11.11.11", key_ipv4->data);
+ assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
+ errno == ENOENT);
+
+ inet_pton(AF_INET6, "2a00:ffff::", key_ipv6->data);
+ assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -1 &&
+ errno == ENOENT);
+
+ close(map_fd_ipv4);
+ close(map_fd_ipv6);
+}
+
+static void test_lpm_delete(void)
+{
+ struct bpf_lpm_trie_key *key;
+ size_t key_size;
+ int map_fd;
+ __u64 value;
+
+ key_size = sizeof(*key) + sizeof(__u32);
+ key = alloca(key_size);
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ key_size, sizeof(value),
+ 100, BPF_F_NO_PREALLOC);
+ assert(map_fd >= 0);
+
+ /* Add nodes:
+ * 192.168.0.0/16 (1)
+ * 192.168.0.0/24 (2)
+ * 192.168.128.0/24 (3)
+ * 192.168.1.0/24 (4)
+ *
+ * (1)
+ * / \
+ * (IM) (3)
+ * / \
+ * (2) (4)
+ */
+ value = 1;
+ key->prefixlen = 16;
+ inet_pton(AF_INET, "192.168.0.0", key->data);
+ assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+ value = 2;
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.0.0", key->data);
+ assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+ value = 3;
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.128.0", key->data);
+ assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+ value = 4;
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.1.0", key->data);
+ assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+ /* remove non-existent node */
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "10.0.0.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
+ errno == ENOENT);
+
+ key->prefixlen = 30; // unused prefix so far
+ inet_pton(AF_INET, "192.255.0.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == -1 &&
+ errno == ENOENT);
+
+ key->prefixlen = 16; // same prefix as the root node
+ inet_pton(AF_INET, "192.255.0.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == -1 &&
+ errno == ENOENT);
+
+ /* assert initial lookup */
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "192.168.0.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+ assert(value == 2);
+
+ /* remove leaf node */
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.0.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "192.168.0.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+ assert(value == 1);
+
+ /* remove leaf (and intermediary) node */
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.1.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "192.168.1.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+ assert(value == 1);
+
+ /* remove root node */
+ key->prefixlen = 16;
+ inet_pton(AF_INET, "192.168.0.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "192.168.128.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+ assert(value == 3);
+
+ /* remove last node */
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.128.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "192.168.128.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
+ errno == ENOENT);
+
+ close(map_fd);
+}
+
+static void test_lpm_get_next_key(void)
+{
+ struct bpf_lpm_trie_key *key_p, *next_key_p;
+ size_t key_size;
+ __u32 value = 0;
+ int map_fd;
+
+ key_size = sizeof(*key_p) + sizeof(__u32);
+ key_p = alloca(key_size);
+ next_key_p = alloca(key_size);
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value),
+ 100, BPF_F_NO_PREALLOC);
+ assert(map_fd >= 0);
+
+ /* empty tree. get_next_key should return ENOENT */
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 &&
+ errno == ENOENT);
+
+ /* get and verify the first key, get the second one should fail. */
+ key_p->prefixlen = 16;
+ inet_pton(AF_INET, "192.168.0.0", key_p->data);
+ assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+ memset(key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168);
+
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+ errno == ENOENT);
+
+ /* no exact matching key should get the first one in post order. */
+ key_p->prefixlen = 8;
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168);
+
+ /* add one more element (total two) */
+ key_p->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.128.0", key_p->data);
+ assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+ memset(key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168 && key_p->data[2] == 128);
+
+ memset(next_key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+ errno == ENOENT);
+
+ /* Add one more element (total three) */
+ key_p->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.0.0", key_p->data);
+ assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+ memset(key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168 && key_p->data[2] == 0);
+
+ memset(next_key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+ errno == ENOENT);
+
+ /* Add one more element (total four) */
+ key_p->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.1.0", key_p->data);
+ assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+ memset(key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168 && key_p->data[2] == 0);
+
+ memset(next_key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 1);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+ errno == ENOENT);
+
+ /* Add one more element (total five) */
+ key_p->prefixlen = 28;
+ inet_pton(AF_INET, "192.168.1.128", key_p->data);
+ assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+ memset(key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168 && key_p->data[2] == 0);
+
+ memset(next_key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 28 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 1 &&
+ next_key_p->data[3] == 128);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 1);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+ errno == ENOENT);
+
+ /* no exact matching key should return the first one in post order */
+ key_p->prefixlen = 22;
+ inet_pton(AF_INET, "192.168.1.0", key_p->data);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 0);
+
+ close(map_fd);
+}
+
+#define MAX_TEST_KEYS 4
+struct lpm_mt_test_info {
+ int cmd; /* 0: update, 1: delete, 2: lookup, 3: get_next_key */
+ int iter;
+ int map_fd;
+ struct {
+ __u32 prefixlen;
+ __u32 data;
+ } key[MAX_TEST_KEYS];
+};
+
+static void *lpm_test_command(void *arg)
+{
+ int i, j, ret, iter, key_size;
+ struct lpm_mt_test_info *info = arg;
+ struct bpf_lpm_trie_key *key_p;
+
+ key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32);
+ key_p = alloca(key_size);
+ for (iter = 0; iter < info->iter; iter++)
+ for (i = 0; i < MAX_TEST_KEYS; i++) {
+ /* first half of iterations in forward order,
+ * and second half in backward order.
+ */
+ j = (iter < (info->iter / 2)) ? i : MAX_TEST_KEYS - i - 1;
+ key_p->prefixlen = info->key[j].prefixlen;
+ memcpy(key_p->data, &info->key[j].data, sizeof(__u32));
+ if (info->cmd == 0) {
+ __u32 value = j;
+ /* update must succeed */
+ assert(bpf_map_update_elem(info->map_fd, key_p, &value, 0) == 0);
+ } else if (info->cmd == 1) {
+ ret = bpf_map_delete_elem(info->map_fd, key_p);
+ assert(ret == 0 || errno == ENOENT);
+ } else if (info->cmd == 2) {
+ __u32 value;
+ ret = bpf_map_lookup_elem(info->map_fd, key_p, &value);
+ assert(ret == 0 || errno == ENOENT);
+ } else {
+ struct bpf_lpm_trie_key *next_key_p = alloca(key_size);
+ ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p);
+ assert(ret == 0 || errno == ENOENT || errno == ENOMEM);
+ }
+ }
+
+ // Pass successful exit info back to the main thread
+ pthread_exit((void *)info);
+}
+
+static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd)
+{
+ info->iter = 2000;
+ info->map_fd = map_fd;
+ info->key[0].prefixlen = 16;
+ inet_pton(AF_INET, "192.168.0.0", &info->key[0].data);
+ info->key[1].prefixlen = 24;
+ inet_pton(AF_INET, "192.168.0.0", &info->key[1].data);
+ info->key[2].prefixlen = 24;
+ inet_pton(AF_INET, "192.168.128.0", &info->key[2].data);
+ info->key[3].prefixlen = 24;
+ inet_pton(AF_INET, "192.168.1.0", &info->key[3].data);
+}
+
+static void test_lpm_multi_thread(void)
+{
+ struct lpm_mt_test_info info[4];
+ size_t key_size, value_size;
+ pthread_t thread_id[4];
+ int i, map_fd;
+ void *ret;
+
+ /* create a trie */
+ value_size = sizeof(__u32);
+ key_size = sizeof(struct bpf_lpm_trie_key) + value_size;
+ map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size,
+ 100, BPF_F_NO_PREALLOC);
+
+ /* create 4 threads to test update, delete, lookup and get_next_key */
+ setup_lpm_mt_test_info(&info[0], map_fd);
+ for (i = 0; i < 4; i++) {
+ if (i != 0)
+ memcpy(&info[i], &info[0], sizeof(info[i]));
+ info[i].cmd = i;
+ assert(pthread_create(&thread_id[i], NULL, &lpm_test_command, &info[i]) == 0);
+ }
+
+ for (i = 0; i < 4; i++)
+ assert(pthread_join(thread_id[i], &ret) == 0 && ret == (void *)&info[i]);
+
+ close(map_fd);
+}
+
+int main(void)
+{
+ int i;
+
+ /* we want predictable, pseudo random tests */
+ srand(0xf00ba1);
+
+ test_lpm_basic();
+ test_lpm_order();
+
+ /* Test with 8, 16, 24, 32, ... 128 bit prefix length */
+ for (i = 1; i <= 16; ++i)
+ test_lpm_map(i);
+
+ test_lpm_ipaddr();
+ test_lpm_delete();
+ test_lpm_get_next_key();
+ test_lpm_multi_thread();
+
+ printf("test_lpm: OK\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
new file mode 100644
index 000000000..781c7de34
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -0,0 +1,646 @@
+/*
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include <sys/wait.h>
+
+#include <bpf/bpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+
+#define LOCAL_FREE_TARGET (128)
+#define PERCPU_FREE_TARGET (4)
+
+static int nr_cpus;
+
+static int create_map(int map_type, int map_flags, unsigned int size)
+{
+ int map_fd;
+
+ map_fd = bpf_create_map(map_type, sizeof(unsigned long long),
+ sizeof(unsigned long long), size, map_flags);
+
+ if (map_fd == -1)
+ perror("bpf_create_map");
+
+ return map_fd;
+}
+
+static int map_subset(int map0, int map1)
+{
+ unsigned long long next_key = 0;
+ unsigned long long value0[nr_cpus], value1[nr_cpus];
+ int ret;
+
+ while (!bpf_map_get_next_key(map1, &next_key, &next_key)) {
+ assert(!bpf_map_lookup_elem(map1, &next_key, value1));
+ ret = bpf_map_lookup_elem(map0, &next_key, value0);
+ if (ret) {
+ printf("key:%llu not found from map. %s(%d)\n",
+ next_key, strerror(errno), errno);
+ return 0;
+ }
+ if (value0[0] != value1[0]) {
+ printf("key:%llu value0:%llu != value1:%llu\n",
+ next_key, value0[0], value1[0]);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int map_equal(int lru_map, int expected)
+{
+ return map_subset(lru_map, expected) && map_subset(expected, lru_map);
+}
+
+static int sched_next_online(int pid, int *next_to_try)
+{
+ cpu_set_t cpuset;
+ int next = *next_to_try;
+ int ret = -1;
+
+ while (next < nr_cpus) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(next++, &cpuset);
+ if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset)) {
+ ret = 0;
+ break;
+ }
+ }
+
+ *next_to_try = next;
+ return ret;
+}
+
+/* Size of the LRU amp is 2
+ * Add key=1 (+1 key)
+ * Add key=2 (+1 key)
+ * Lookup Key=1
+ * Add Key=3
+ * => Key=2 will be removed by LRU
+ * Iterate map. Only found key=1 and key=3
+ */
+static void test_lru_sanity0(int map_type, int map_flags)
+{
+ unsigned long long key, value[nr_cpus];
+ int lru_map_fd, expected_map_fd;
+ int next_cpu = 0;
+
+ printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+ map_flags);
+
+ assert(sched_next_online(0, &next_cpu) != -1);
+
+ if (map_flags & BPF_F_NO_COMMON_LRU)
+ lru_map_fd = create_map(map_type, map_flags, 2 * nr_cpus);
+ else
+ lru_map_fd = create_map(map_type, map_flags, 2);
+ assert(lru_map_fd != -1);
+
+ expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, 2);
+ assert(expected_map_fd != -1);
+
+ value[0] = 1234;
+
+ /* insert key=1 element */
+
+ key = 1;
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+
+ /* BPF_NOEXIST means: add new element if it doesn't exist */
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
+ /* key=1 already exists */
+ && errno == EEXIST);
+
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -1 &&
+ errno == EINVAL);
+
+ /* insert key=2 element */
+
+ /* check that key=2 is not found */
+ key = 2;
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
+ errno == ENOENT);
+
+ /* BPF_EXIST means: update existing element */
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
+ /* key=2 is not there */
+ errno == ENOENT);
+
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
+
+ /* insert key=3 element */
+
+ /* check that key=3 is not found */
+ key = 3;
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
+ errno == ENOENT);
+
+ /* check that key=1 can be found and mark the ref bit to
+ * stop LRU from removing key=1
+ */
+ key = 1;
+ assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
+ assert(value[0] == 1234);
+
+ key = 3;
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+
+ /* key=2 has been removed from the LRU */
+ key = 2;
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1);
+
+ assert(map_equal(lru_map_fd, expected_map_fd));
+
+ close(expected_map_fd);
+ close(lru_map_fd);
+
+ printf("Pass\n");
+}
+
+/* Size of the LRU map is 1.5*tgt_free
+ * Insert 1 to tgt_free (+tgt_free keys)
+ * Lookup 1 to tgt_free/2
+ * Insert 1+tgt_free to 2*tgt_free (+tgt_free keys)
+ * => 1+tgt_free/2 to LOCALFREE_TARGET will be removed by LRU
+ */
+static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
+{
+ unsigned long long key, end_key, value[nr_cpus];
+ int lru_map_fd, expected_map_fd;
+ unsigned int batch_size;
+ unsigned int map_size;
+ int next_cpu = 0;
+
+ if (map_flags & BPF_F_NO_COMMON_LRU)
+ /* This test is only applicable to common LRU list */
+ return;
+
+ printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+ map_flags);
+
+ assert(sched_next_online(0, &next_cpu) != -1);
+
+ batch_size = tgt_free / 2;
+ assert(batch_size * 2 == tgt_free);
+
+ map_size = tgt_free + batch_size;
+ lru_map_fd = create_map(map_type, map_flags, map_size);
+ assert(lru_map_fd != -1);
+
+ expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+ assert(expected_map_fd != -1);
+
+ value[0] = 1234;
+
+ /* Insert 1 to tgt_free (+tgt_free keys) */
+ end_key = 1 + tgt_free;
+ for (key = 1; key < end_key; key++)
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+
+ /* Lookup 1 to tgt_free/2 */
+ end_key = 1 + batch_size;
+ for (key = 1; key < end_key; key++) {
+ assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ /* Insert 1+tgt_free to 2*tgt_free
+ * => 1+tgt_free/2 to LOCALFREE_TARGET will be
+ * removed by LRU
+ */
+ key = 1 + tgt_free;
+ end_key = key + tgt_free;
+ for (; key < end_key; key++) {
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ assert(map_equal(lru_map_fd, expected_map_fd));
+
+ close(expected_map_fd);
+ close(lru_map_fd);
+
+ printf("Pass\n");
+}
+
+/* Size of the LRU map 1.5 * tgt_free
+ * Insert 1 to tgt_free (+tgt_free keys)
+ * Update 1 to tgt_free/2
+ * => The original 1 to tgt_free/2 will be removed due to
+ * the LRU shrink process
+ * Re-insert 1 to tgt_free/2 again and do a lookup immeidately
+ * Insert 1+tgt_free to tgt_free*3/2
+ * Insert 1+tgt_free*3/2 to tgt_free*5/2
+ * => Key 1+tgt_free to tgt_free*3/2
+ * will be removed from LRU because it has never
+ * been lookup and ref bit is not set
+ */
+static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
+{
+ unsigned long long key, value[nr_cpus];
+ unsigned long long end_key;
+ int lru_map_fd, expected_map_fd;
+ unsigned int batch_size;
+ unsigned int map_size;
+ int next_cpu = 0;
+
+ if (map_flags & BPF_F_NO_COMMON_LRU)
+ /* This test is only applicable to common LRU list */
+ return;
+
+ printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+ map_flags);
+
+ assert(sched_next_online(0, &next_cpu) != -1);
+
+ batch_size = tgt_free / 2;
+ assert(batch_size * 2 == tgt_free);
+
+ map_size = tgt_free + batch_size;
+ lru_map_fd = create_map(map_type, map_flags, map_size);
+ assert(lru_map_fd != -1);
+
+ expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+ assert(expected_map_fd != -1);
+
+ value[0] = 1234;
+
+ /* Insert 1 to tgt_free (+tgt_free keys) */
+ end_key = 1 + tgt_free;
+ for (key = 1; key < end_key; key++)
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+
+ /* Any bpf_map_update_elem will require to acquire a new node
+ * from LRU first.
+ *
+ * The local list is running out of free nodes.
+ * It gets from the global LRU list which tries to
+ * shrink the inactive list to get tgt_free
+ * number of free nodes.
+ *
+ * Hence, the oldest key 1 to tgt_free/2
+ * are removed from the LRU list.
+ */
+ key = 1;
+ if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ assert(!bpf_map_delete_elem(lru_map_fd, &key));
+ } else {
+ assert(bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_EXIST));
+ }
+
+ /* Re-insert 1 to tgt_free/2 again and do a lookup
+ * immeidately.
+ */
+ end_key = 1 + batch_size;
+ value[0] = 4321;
+ for (key = 1; key < end_key; key++) {
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value));
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
+ assert(value[0] == 4321);
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ value[0] = 1234;
+
+ /* Insert 1+tgt_free to tgt_free*3/2 */
+ end_key = 1 + tgt_free + batch_size;
+ for (key = 1 + tgt_free; key < end_key; key++)
+ /* These newly added but not referenced keys will be
+ * gone during the next LRU shrink.
+ */
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+
+ /* Insert 1+tgt_free*3/2 to tgt_free*5/2 */
+ end_key = key + tgt_free;
+ for (; key < end_key; key++) {
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ assert(map_equal(lru_map_fd, expected_map_fd));
+
+ close(expected_map_fd);
+ close(lru_map_fd);
+
+ printf("Pass\n");
+}
+
+/* Size of the LRU map is 2*tgt_free
+ * It is to test the active/inactive list rotation
+ * Insert 1 to 2*tgt_free (+2*tgt_free keys)
+ * Lookup key 1 to tgt_free*3/2
+ * Add 1+2*tgt_free to tgt_free*5/2 (+tgt_free/2 keys)
+ * => key 1+tgt_free*3/2 to 2*tgt_free are removed from LRU
+ */
+static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
+{
+ unsigned long long key, end_key, value[nr_cpus];
+ int lru_map_fd, expected_map_fd;
+ unsigned int batch_size;
+ unsigned int map_size;
+ int next_cpu = 0;
+
+ if (map_flags & BPF_F_NO_COMMON_LRU)
+ /* This test is only applicable to common LRU list */
+ return;
+
+ printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+ map_flags);
+
+ assert(sched_next_online(0, &next_cpu) != -1);
+
+ batch_size = tgt_free / 2;
+ assert(batch_size * 2 == tgt_free);
+
+ map_size = tgt_free * 2;
+ lru_map_fd = create_map(map_type, map_flags, map_size);
+ assert(lru_map_fd != -1);
+
+ expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+ assert(expected_map_fd != -1);
+
+ value[0] = 1234;
+
+ /* Insert 1 to 2*tgt_free (+2*tgt_free keys) */
+ end_key = 1 + (2 * tgt_free);
+ for (key = 1; key < end_key; key++)
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+
+ /* Lookup key 1 to tgt_free*3/2 */
+ end_key = tgt_free + batch_size;
+ for (key = 1; key < end_key; key++) {
+ assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ /* Add 1+2*tgt_free to tgt_free*5/2
+ * (+tgt_free/2 keys)
+ */
+ key = 2 * tgt_free + 1;
+ end_key = key + batch_size;
+ for (; key < end_key; key++) {
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ assert(map_equal(lru_map_fd, expected_map_fd));
+
+ close(expected_map_fd);
+ close(lru_map_fd);
+
+ printf("Pass\n");
+}
+
+/* Test deletion */
+static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
+{
+ int lru_map_fd, expected_map_fd;
+ unsigned long long key, value[nr_cpus];
+ unsigned long long end_key;
+ int next_cpu = 0;
+
+ printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+ map_flags);
+
+ assert(sched_next_online(0, &next_cpu) != -1);
+
+ if (map_flags & BPF_F_NO_COMMON_LRU)
+ lru_map_fd = create_map(map_type, map_flags,
+ 3 * tgt_free * nr_cpus);
+ else
+ lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free);
+ assert(lru_map_fd != -1);
+
+ expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0,
+ 3 * tgt_free);
+ assert(expected_map_fd != -1);
+
+ value[0] = 1234;
+
+ for (key = 1; key <= 2 * tgt_free; key++)
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+
+ key = 1;
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
+
+ for (key = 1; key <= tgt_free; key++) {
+ assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ for (; key <= 2 * tgt_free; key++) {
+ assert(!bpf_map_delete_elem(lru_map_fd, &key));
+ assert(bpf_map_delete_elem(lru_map_fd, &key));
+ }
+
+ end_key = key + 2 * tgt_free;
+ for (; key < end_key; key++) {
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ assert(map_equal(lru_map_fd, expected_map_fd));
+
+ close(expected_map_fd);
+ close(lru_map_fd);
+
+ printf("Pass\n");
+}
+
+static void do_test_lru_sanity5(unsigned long long last_key, int map_fd)
+{
+ unsigned long long key, value[nr_cpus];
+
+ /* Ensure the last key inserted by previous CPU can be found */
+ assert(!bpf_map_lookup_elem(map_fd, &last_key, value));
+
+ value[0] = 1234;
+
+ key = last_key + 1;
+ assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST));
+ assert(!bpf_map_lookup_elem(map_fd, &key, value));
+
+ /* Cannot find the last key because it was removed by LRU */
+ assert(bpf_map_lookup_elem(map_fd, &last_key, value));
+}
+
+/* Test map with only one element */
+static void test_lru_sanity5(int map_type, int map_flags)
+{
+ unsigned long long key, value[nr_cpus];
+ int next_cpu = 0;
+ int map_fd;
+
+ if (map_flags & BPF_F_NO_COMMON_LRU)
+ return;
+
+ printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+ map_flags);
+
+ map_fd = create_map(map_type, map_flags, 1);
+ assert(map_fd != -1);
+
+ value[0] = 1234;
+ key = 0;
+ assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST));
+
+ while (sched_next_online(0, &next_cpu) != -1) {
+ pid_t pid;
+
+ pid = fork();
+ if (pid == 0) {
+ do_test_lru_sanity5(key, map_fd);
+ exit(0);
+ } else if (pid == -1) {
+ printf("couldn't spawn process to test key:%llu\n",
+ key);
+ exit(1);
+ } else {
+ int status;
+
+ assert(waitpid(pid, &status, 0) == pid);
+ assert(status == 0);
+ key++;
+ }
+ }
+
+ close(map_fd);
+ /* At least one key should be tested */
+ assert(key > 0);
+
+ printf("Pass\n");
+}
+
+/* Test list rotation for BPF_F_NO_COMMON_LRU map */
+static void test_lru_sanity6(int map_type, int map_flags, int tgt_free)
+{
+ int lru_map_fd, expected_map_fd;
+ unsigned long long key, value[nr_cpus];
+ unsigned int map_size = tgt_free * 2;
+ int next_cpu = 0;
+
+ if (!(map_flags & BPF_F_NO_COMMON_LRU))
+ return;
+
+ printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+ map_flags);
+
+ assert(sched_next_online(0, &next_cpu) != -1);
+
+ expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+ assert(expected_map_fd != -1);
+
+ lru_map_fd = create_map(map_type, map_flags, map_size * nr_cpus);
+ assert(lru_map_fd != -1);
+
+ value[0] = 1234;
+
+ for (key = 1; key <= tgt_free; key++) {
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ for (; key <= tgt_free * 2; key++) {
+ unsigned long long stable_key;
+
+ /* Make ref bit sticky for key: [1, tgt_free] */
+ for (stable_key = 1; stable_key <= tgt_free; stable_key++) {
+ /* Mark the ref bit */
+ assert(!bpf_map_lookup_elem(lru_map_fd, &stable_key,
+ value));
+ }
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ for (; key <= tgt_free * 3; key++) {
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ assert(map_equal(lru_map_fd, expected_map_fd));
+
+ close(expected_map_fd);
+ close(lru_map_fd);
+
+ printf("Pass\n");
+}
+
+int main(int argc, char **argv)
+{
+ int map_types[] = {BPF_MAP_TYPE_LRU_HASH,
+ BPF_MAP_TYPE_LRU_PERCPU_HASH};
+ int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
+ int t, f;
+
+ setbuf(stdout, NULL);
+
+ nr_cpus = bpf_num_possible_cpus();
+ assert(nr_cpus != -1);
+ printf("nr_cpus:%d\n\n", nr_cpus);
+
+ for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) {
+ unsigned int tgt_free = (map_flags[f] & BPF_F_NO_COMMON_LRU) ?
+ PERCPU_FREE_TARGET : LOCAL_FREE_TARGET;
+
+ for (t = 0; t < sizeof(map_types) / sizeof(*map_types); t++) {
+ test_lru_sanity0(map_types[t], map_flags[f]);
+ test_lru_sanity1(map_types[t], map_flags[f], tgt_free);
+ test_lru_sanity2(map_types[t], map_flags[f], tgt_free);
+ test_lru_sanity3(map_types[t], map_flags[f], tgt_free);
+ test_lru_sanity4(map_types[t], map_flags[f], tgt_free);
+ test_lru_sanity5(map_types[t], map_flags[f]);
+ test_lru_sanity6(map_types[t], map_flags[f], tgt_free);
+
+ printf("\n");
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.c b/tools/testing/selftests/bpf/test_lwt_seg6local.c
new file mode 100644
index 000000000..e2f6ed0a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.c
@@ -0,0 +1,437 @@
+#include <stddef.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <linux/seg6_local.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+/* Packet parsing state machine helpers. */
+#define cursor_advance(_cursor, _len) \
+ ({ void *_tmp = _cursor; _cursor += _len; _tmp; })
+
+#define SR6_FLAG_ALERT (1 << 4)
+
+#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
+ 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
+#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
+ 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
+#define BPF_PACKET_HEADER __attribute__((packed))
+
+struct ip6_t {
+ unsigned int ver:4;
+ unsigned int priority:8;
+ unsigned int flow_label:20;
+ unsigned short payload_len;
+ unsigned char next_header;
+ unsigned char hop_limit;
+ unsigned long long src_hi;
+ unsigned long long src_lo;
+ unsigned long long dst_hi;
+ unsigned long long dst_lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_addr_t {
+ unsigned long long hi;
+ unsigned long long lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_srh_t {
+ unsigned char nexthdr;
+ unsigned char hdrlen;
+ unsigned char type;
+ unsigned char segments_left;
+ unsigned char first_segment;
+ unsigned char flags;
+ unsigned short tag;
+
+ struct ip6_addr_t segments[0];
+} BPF_PACKET_HEADER;
+
+struct sr6_tlv_t {
+ unsigned char type;
+ unsigned char len;
+ unsigned char value[0];
+} BPF_PACKET_HEADER;
+
+static __always_inline struct ip6_srh_t *get_srh(struct __sk_buff *skb)
+{
+ void *cursor, *data_end;
+ struct ip6_srh_t *srh;
+ struct ip6_t *ip;
+ uint8_t *ipver;
+
+ data_end = (void *)(long)skb->data_end;
+ cursor = (void *)(long)skb->data;
+ ipver = (uint8_t *)cursor;
+
+ if ((void *)ipver + sizeof(*ipver) > data_end)
+ return NULL;
+
+ if ((*ipver >> 4) != 6)
+ return NULL;
+
+ ip = cursor_advance(cursor, sizeof(*ip));
+ if ((void *)ip + sizeof(*ip) > data_end)
+ return NULL;
+
+ if (ip->next_header != 43)
+ return NULL;
+
+ srh = cursor_advance(cursor, sizeof(*srh));
+ if ((void *)srh + sizeof(*srh) > data_end)
+ return NULL;
+
+ if (srh->type != 4)
+ return NULL;
+
+ return srh;
+}
+
+static __always_inline
+int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
+ uint32_t old_pad, uint32_t pad_off)
+{
+ int err;
+
+ if (new_pad != old_pad) {
+ err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
+ (int) new_pad - (int) old_pad);
+ if (err)
+ return err;
+ }
+
+ if (new_pad > 0) {
+ char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0};
+ struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
+
+ pad_tlv->type = SR6_TLV_PADDING;
+ pad_tlv->len = new_pad - 2;
+
+ err = bpf_lwt_seg6_store_bytes(skb, pad_off,
+ (void *)pad_tlv_buf, new_pad);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static __always_inline
+int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
+ uint32_t *tlv_off, uint32_t *pad_size,
+ uint32_t *pad_off)
+{
+ uint32_t srh_off, cur_off;
+ int offset_valid = 0;
+ int err;
+
+ srh_off = (char *)srh - (char *)(long)skb->data;
+ // cur_off = end of segments, start of possible TLVs
+ cur_off = srh_off + sizeof(*srh) +
+ sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
+
+ *pad_off = 0;
+
+ // we can only go as far as ~10 TLVs due to the BPF max stack size
+ #pragma clang loop unroll(full)
+ for (int i = 0; i < 10; i++) {
+ struct sr6_tlv_t tlv;
+
+ if (cur_off == *tlv_off)
+ offset_valid = 1;
+
+ if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
+ break;
+
+ err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
+ if (err)
+ return err;
+
+ if (tlv.type == SR6_TLV_PADDING) {
+ *pad_size = tlv.len + sizeof(tlv);
+ *pad_off = cur_off;
+
+ if (*tlv_off == srh_off) {
+ *tlv_off = cur_off;
+ offset_valid = 1;
+ }
+ break;
+
+ } else if (tlv.type == SR6_TLV_HMAC) {
+ break;
+ }
+
+ cur_off += sizeof(tlv) + tlv.len;
+ } // we reached the padding or HMAC TLVs, or the end of the SRH
+
+ if (*pad_off == 0)
+ *pad_off = cur_off;
+
+ if (*tlv_off == -1)
+ *tlv_off = cur_off;
+ else if (!offset_valid)
+ return -EINVAL;
+
+ return 0;
+}
+
+static __always_inline
+int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
+ struct sr6_tlv_t *itlv, uint8_t tlv_size)
+{
+ uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
+ uint8_t len_remaining, new_pad;
+ uint32_t pad_off = 0;
+ uint32_t pad_size = 0;
+ uint32_t partial_srh_len;
+ int err;
+
+ if (tlv_off != -1)
+ tlv_off += srh_off;
+
+ if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
+ return -EINVAL;
+
+ err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
+ if (err)
+ return err;
+
+ err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
+ if (err)
+ return err;
+
+ err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
+ if (err)
+ return err;
+
+ // the following can't be moved inside update_tlv_pad because the
+ // bpf verifier has some issues with it
+ pad_off += sizeof(*itlv) + itlv->len;
+ partial_srh_len = pad_off - srh_off;
+ len_remaining = partial_srh_len % 8;
+ new_pad = 8 - len_remaining;
+
+ if (new_pad == 1) // cannot pad for 1 byte only
+ new_pad = 9;
+ else if (new_pad == 8)
+ new_pad = 0;
+
+ return update_tlv_pad(skb, new_pad, pad_size, pad_off);
+}
+
+static __always_inline
+int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
+ uint32_t tlv_off)
+{
+ uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
+ uint8_t len_remaining, new_pad;
+ uint32_t partial_srh_len;
+ uint32_t pad_off = 0;
+ uint32_t pad_size = 0;
+ struct sr6_tlv_t tlv;
+ int err;
+
+ tlv_off += srh_off;
+
+ err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
+ if (err)
+ return err;
+
+ err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv));
+ if (err)
+ return err;
+
+ err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len));
+ if (err)
+ return err;
+
+ pad_off -= sizeof(tlv) + tlv.len;
+ partial_srh_len = pad_off - srh_off;
+ len_remaining = partial_srh_len % 8;
+ new_pad = 8 - len_remaining;
+ if (new_pad == 1) // cannot pad for 1 byte only
+ new_pad = 9;
+ else if (new_pad == 8)
+ new_pad = 0;
+
+ return update_tlv_pad(skb, new_pad, pad_size, pad_off);
+}
+
+static __always_inline
+int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
+{
+ int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) +
+ ((srh->first_segment + 1) << 4);
+ struct sr6_tlv_t tlv;
+
+ if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t)))
+ return 0;
+
+ if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) {
+ struct ip6_addr_t egr_addr;
+
+ if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16))
+ return 0;
+
+ // check if egress TLV value is correct
+ if (ntohll(egr_addr.hi) == 0xfd00000000000000 &&
+ ntohll(egr_addr.lo) == 0x4)
+ return 1;
+ }
+
+ return 0;
+}
+
+// This function will push a SRH with segments fd00::1, fd00::2, fd00::3,
+// fd00::4
+SEC("encap_srh")
+int __encap_srh(struct __sk_buff *skb)
+{
+ unsigned long long hi = 0xfd00000000000000;
+ struct ip6_addr_t *seg;
+ struct ip6_srh_t *srh;
+ char srh_buf[72]; // room for 4 segments
+ int err;
+
+ srh = (struct ip6_srh_t *)srh_buf;
+ srh->nexthdr = 0;
+ srh->hdrlen = 8;
+ srh->type = 4;
+ srh->segments_left = 3;
+ srh->first_segment = 3;
+ srh->flags = 0;
+ srh->tag = 0;
+
+ seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
+
+ #pragma clang loop unroll(full)
+ for (unsigned long long lo = 0; lo < 4; lo++) {
+ seg->lo = htonll(4 - lo);
+ seg->hi = htonll(hi);
+ seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
+ }
+
+ err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf));
+ if (err)
+ return BPF_DROP;
+
+ return BPF_REDIRECT;
+}
+
+// Add an Egress TLV fc00::4, add the flag A,
+// and apply End.X action to fc42::1
+SEC("add_egr_x")
+int __add_egr_x(struct __sk_buff *skb)
+{
+ unsigned long long hi = 0xfc42000000000000;
+ unsigned long long lo = 0x1;
+ struct ip6_srh_t *srh = get_srh(skb);
+ uint8_t new_flags = SR6_FLAG_ALERT;
+ struct ip6_addr_t addr;
+ int err, offset;
+
+ if (srh == NULL)
+ return BPF_DROP;
+
+ uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
+
+ err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
+ (struct sr6_tlv_t *)&tlv, 20);
+ if (err)
+ return BPF_DROP;
+
+ offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
+ err = bpf_lwt_seg6_store_bytes(skb, offset,
+ (void *)&new_flags, sizeof(new_flags));
+ if (err)
+ return BPF_DROP;
+
+ addr.lo = htonll(lo);
+ addr.hi = htonll(hi);
+ err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
+ (void *)&addr, sizeof(addr));
+ if (err)
+ return BPF_DROP;
+ return BPF_REDIRECT;
+}
+
+// Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a
+// simple End action
+SEC("pop_egr")
+int __pop_egr(struct __sk_buff *skb)
+{
+ struct ip6_srh_t *srh = get_srh(skb);
+ uint16_t new_tag = bpf_htons(2442);
+ uint8_t new_flags = 0;
+ int err, offset;
+
+ if (srh == NULL)
+ return BPF_DROP;
+
+ if (srh->flags != SR6_FLAG_ALERT)
+ return BPF_DROP;
+
+ if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV
+ return BPF_DROP;
+
+ if (!has_egr_tlv(skb, srh))
+ return BPF_DROP;
+
+ err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16);
+ if (err)
+ return BPF_DROP;
+
+ offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
+ if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags,
+ sizeof(new_flags)))
+ return BPF_DROP;
+
+ offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag);
+ if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag,
+ sizeof(new_tag)))
+ return BPF_DROP;
+
+ return BPF_OK;
+}
+
+// Inspect if the Egress TLV and flag have been removed, if the tag is correct,
+// then apply a End.T action to reach the last segment
+SEC("inspect_t")
+int __inspect_t(struct __sk_buff *skb)
+{
+ struct ip6_srh_t *srh = get_srh(skb);
+ int table = 117;
+ int err;
+
+ if (srh == NULL)
+ return BPF_DROP;
+
+ if (srh->flags != 0)
+ return BPF_DROP;
+
+ if (srh->tag != bpf_htons(2442))
+ return BPF_DROP;
+
+ if (srh->hdrlen != 8) // 4 segments
+ return BPF_DROP;
+
+ err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T,
+ (void *)&table, sizeof(table));
+
+ if (err)
+ return BPF_DROP;
+
+ return BPF_REDIRECT;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
new file mode 100755
index 000000000..785eabf2a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+# Connects 6 network namespaces through veths.
+# Each NS may have different IPv6 global scope addresses :
+# NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6
+# fb00::1 fd00::1 fd00::2 fd00::3 fb00::6
+# fc42::1 fd00::4
+#
+# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
+# IPv6 header with a Segment Routing Header, with segments :
+# fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
+#
+# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
+# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
+# - fd00::2 : remove the TLV, change the flags, add a tag
+# - fd00::3 : apply an End.T action to fd00::4, through routing table 117
+#
+# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
+# Each End.BPF action will validate the operations applied on the SRH by the
+# previous BPF program in the chain, otherwise the packet is dropped.
+#
+# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
+# datagram can be read on NS6 when binding to fb00::6.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ $UID != 0 ]; then
+ echo $msg please run this as root >&2
+ exit $ksft_skip
+fi
+
+TMP_FILE="/tmp/selftest_lwt_seg6local.txt"
+
+cleanup()
+{
+ if [ "$?" = "0" ]; then
+ echo "selftests: test_lwt_seg6local [PASS]";
+ else
+ echo "selftests: test_lwt_seg6local [FAILED]";
+ fi
+
+ set +e
+ ip netns del ns1 2> /dev/null
+ ip netns del ns2 2> /dev/null
+ ip netns del ns3 2> /dev/null
+ ip netns del ns4 2> /dev/null
+ ip netns del ns5 2> /dev/null
+ ip netns del ns6 2> /dev/null
+ rm -f $TMP_FILE
+}
+
+set -e
+
+ip netns add ns1
+ip netns add ns2
+ip netns add ns3
+ip netns add ns4
+ip netns add ns5
+ip netns add ns6
+
+trap cleanup 0 2 3 6 9
+
+ip link add veth1 type veth peer name veth2
+ip link add veth3 type veth peer name veth4
+ip link add veth5 type veth peer name veth6
+ip link add veth7 type veth peer name veth8
+ip link add veth9 type veth peer name veth10
+
+ip link set veth1 netns ns1
+ip link set veth2 netns ns2
+ip link set veth3 netns ns2
+ip link set veth4 netns ns3
+ip link set veth5 netns ns3
+ip link set veth6 netns ns4
+ip link set veth7 netns ns4
+ip link set veth8 netns ns5
+ip link set veth9 netns ns5
+ip link set veth10 netns ns6
+
+ip netns exec ns1 ip link set dev veth1 up
+ip netns exec ns2 ip link set dev veth2 up
+ip netns exec ns2 ip link set dev veth3 up
+ip netns exec ns3 ip link set dev veth4 up
+ip netns exec ns3 ip link set dev veth5 up
+ip netns exec ns4 ip link set dev veth6 up
+ip netns exec ns4 ip link set dev veth7 up
+ip netns exec ns5 ip link set dev veth8 up
+ip netns exec ns5 ip link set dev veth9 up
+ip netns exec ns6 ip link set dev veth10 up
+ip netns exec ns6 ip link set dev lo up
+
+# All link scope addresses and routes required between veths
+ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link
+ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link
+ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link
+ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link
+ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link
+ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link
+ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link
+ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link
+ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link
+ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link
+ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link
+ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link
+ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link
+ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link
+ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link
+ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link
+
+ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo
+ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21
+
+ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
+ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
+
+ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65
+ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4
+
+ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6
+ip netns exec ns4 ip -6 addr add fc42::1 dev lo
+ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87
+
+ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
+ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8
+
+ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo
+ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo
+
+ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
+ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
+ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
+
+ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
+ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
+sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
+kill -INT $!
+
+if [[ $(< $TMP_FILE) != "foobar" ]]; then
+ exit 1
+fi
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
new file mode 100644
index 000000000..87ba89df9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -0,0 +1,1451 @@
+/*
+ * Testsuite for eBPF maps
+ *
+ * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include <sys/wait.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <linux/bpf.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
+static int map_flags;
+
+#define CHECK(condition, tag, format...) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \
+ printf(format); \
+ exit(-1); \
+ } \
+})
+
+static void test_hashmap(int task, void *data)
+{
+ long long key, next_key, first_key, value;
+ int fd;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ 2, map_flags);
+ if (fd < 0) {
+ printf("Failed to create hashmap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ key = 1;
+ value = 1234;
+ /* Insert key=1 element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+ value = 0;
+ /* BPF_NOEXIST means add new element if it doesn't exist. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ /* key=1 already exists. */
+ errno == EEXIST);
+
+ /* -1 is an invalid flag. */
+ assert(bpf_map_update_elem(fd, &key, &value, -1) == -1 &&
+ errno == EINVAL);
+
+ /* Check that key=1 can be found. */
+ assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
+
+ key = 2;
+ /* Check that key=2 is not found. */
+ assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+
+ /* BPF_EXIST means update existing element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+ /* key=2 is not there. */
+ errno == ENOENT);
+
+ /* Insert key=2 element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
+
+ /* key=1 and key=2 were inserted, check that key=0 cannot be
+ * inserted due to max_entries limit.
+ */
+ key = 0;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == E2BIG);
+
+ /* Update existing element, though the map is full. */
+ key = 1;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
+ key = 2;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+ key = 3;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == E2BIG);
+
+ /* Check that key = 0 doesn't exist. */
+ key = 0;
+ assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+
+ /* Iterate over two elements. */
+ assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
+ (first_key == 1 || first_key == 2));
+ assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
+ (next_key == first_key));
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
+ (next_key == 1 || next_key == 2) &&
+ (next_key != first_key));
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ errno == ENOENT);
+
+ /* Delete both elements. */
+ key = 1;
+ assert(bpf_map_delete_elem(fd, &key) == 0);
+ key = 2;
+ assert(bpf_map_delete_elem(fd, &key) == 0);
+ assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+
+ key = 0;
+ /* Check that map is empty. */
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+ errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+ errno == ENOENT);
+
+ close(fd);
+}
+
+static void test_hashmap_sizes(int task, void *data)
+{
+ int fd, i, j;
+
+ for (i = 1; i <= 512; i <<= 1)
+ for (j = 1; j <= 1 << 18; j <<= 1) {
+ fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j,
+ 2, map_flags);
+ if (fd < 0) {
+ if (errno == ENOMEM)
+ return;
+ printf("Failed to create hashmap key=%d value=%d '%s'\n",
+ i, j, strerror(errno));
+ exit(1);
+ }
+ close(fd);
+ usleep(10); /* give kernel time to destroy */
+ }
+}
+
+static void test_hashmap_percpu(int task, void *data)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ BPF_DECLARE_PERCPU(long, value);
+ long long key, next_key, first_key;
+ int expected_key_mask = 0;
+ int fd, i;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key),
+ sizeof(bpf_percpu(value, 0)), 2, map_flags);
+ if (fd < 0) {
+ printf("Failed to create hashmap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < nr_cpus; i++)
+ bpf_percpu(value, i) = i + 100;
+
+ key = 1;
+ /* Insert key=1 element. */
+ assert(!(expected_key_mask & key));
+ assert(bpf_map_update_elem(fd, &key, value, BPF_ANY) == 0);
+ expected_key_mask |= key;
+
+ /* BPF_NOEXIST means add new element if it doesn't exist. */
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+ /* key=1 already exists. */
+ errno == EEXIST);
+
+ /* -1 is an invalid flag. */
+ assert(bpf_map_update_elem(fd, &key, value, -1) == -1 &&
+ errno == EINVAL);
+
+ /* Check that key=1 can be found. Value could be 0 if the lookup
+ * was run from a different CPU.
+ */
+ bpf_percpu(value, 0) = 1;
+ assert(bpf_map_lookup_elem(fd, &key, value) == 0 &&
+ bpf_percpu(value, 0) == 100);
+
+ key = 2;
+ /* Check that key=2 is not found. */
+ assert(bpf_map_lookup_elem(fd, &key, value) == -1 && errno == ENOENT);
+
+ /* BPF_EXIST means update existing element. */
+ assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == -1 &&
+ /* key=2 is not there. */
+ errno == ENOENT);
+
+ /* Insert key=2 element. */
+ assert(!(expected_key_mask & key));
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == 0);
+ expected_key_mask |= key;
+
+ /* key=1 and key=2 were inserted, check that key=0 cannot be
+ * inserted due to max_entries limit.
+ */
+ key = 0;
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+ errno == E2BIG);
+
+ /* Check that key = 0 doesn't exist. */
+ assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+
+ /* Iterate over two elements. */
+ assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
+ ((expected_key_mask & first_key) == first_key));
+ while (!bpf_map_get_next_key(fd, &key, &next_key)) {
+ if (first_key) {
+ assert(next_key == first_key);
+ first_key = 0;
+ }
+ assert((expected_key_mask & next_key) == next_key);
+ expected_key_mask &= ~next_key;
+
+ assert(bpf_map_lookup_elem(fd, &next_key, value) == 0);
+
+ for (i = 0; i < nr_cpus; i++)
+ assert(bpf_percpu(value, i) == i + 100);
+
+ key = next_key;
+ }
+ assert(errno == ENOENT);
+
+ /* Update with BPF_EXIST. */
+ key = 1;
+ assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == 0);
+
+ /* Delete both elements. */
+ key = 1;
+ assert(bpf_map_delete_elem(fd, &key) == 0);
+ key = 2;
+ assert(bpf_map_delete_elem(fd, &key) == 0);
+ assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+
+ key = 0;
+ /* Check that map is empty. */
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+ errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+ errno == ENOENT);
+
+ close(fd);
+}
+
+static void test_hashmap_walk(int task, void *data)
+{
+ int fd, i, max_entries = 1000;
+ long long key, value, next_key;
+ bool next_key_valid = true;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ max_entries, map_flags);
+ if (fd < 0) {
+ printf("Failed to create hashmap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < max_entries; i++) {
+ key = i; value = key;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
+ }
+
+ for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
+ &next_key) == 0; i++) {
+ key = next_key;
+ assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
+ }
+
+ assert(i == max_entries);
+
+ assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
+ for (i = 0; next_key_valid; i++) {
+ next_key_valid = bpf_map_get_next_key(fd, &key, &next_key) == 0;
+ assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
+ value++;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
+ key = next_key;
+ }
+
+ assert(i == max_entries);
+
+ for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
+ &next_key) == 0; i++) {
+ key = next_key;
+ assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
+ assert(value - 1 == key);
+ }
+
+ assert(i == max_entries);
+ close(fd);
+}
+
+static void test_arraymap(int task, void *data)
+{
+ int key, next_key, fd;
+ long long value;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
+ 2, 0);
+ if (fd < 0) {
+ printf("Failed to create arraymap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ key = 1;
+ value = 1234;
+ /* Insert key=1 element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+ value = 0;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == EEXIST);
+
+ /* Check that key=1 can be found. */
+ assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
+
+ key = 0;
+ /* Check that key=0 is also found and zero initialized. */
+ assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
+
+ /* key=0 and key=1 were inserted, check that key=2 cannot be inserted
+ * due to max_entries limit.
+ */
+ key = 2;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+ errno == E2BIG);
+
+ /* Check that key = 2 doesn't exist. */
+ assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+
+ /* Iterate over two elements. */
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
+ next_key == 0);
+ assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
+ next_key == 0);
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
+ next_key == 1);
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ errno == ENOENT);
+
+ /* Delete shouldn't succeed. */
+ key = 1;
+ assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+
+ close(fd);
+}
+
+static void test_arraymap_percpu(int task, void *data)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ BPF_DECLARE_PERCPU(long, values);
+ int key, next_key, fd, i;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
+ sizeof(bpf_percpu(values, 0)), 2, 0);
+ if (fd < 0) {
+ printf("Failed to create arraymap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < nr_cpus; i++)
+ bpf_percpu(values, i) = i + 100;
+
+ key = 1;
+ /* Insert key=1 element. */
+ assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
+
+ bpf_percpu(values, 0) = 0;
+ assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 &&
+ errno == EEXIST);
+
+ /* Check that key=1 can be found. */
+ assert(bpf_map_lookup_elem(fd, &key, values) == 0 &&
+ bpf_percpu(values, 0) == 100);
+
+ key = 0;
+ /* Check that key=0 is also found and zero initialized. */
+ assert(bpf_map_lookup_elem(fd, &key, values) == 0 &&
+ bpf_percpu(values, 0) == 0 &&
+ bpf_percpu(values, nr_cpus - 1) == 0);
+
+ /* Check that key=2 cannot be inserted due to max_entries limit. */
+ key = 2;
+ assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) == -1 &&
+ errno == E2BIG);
+
+ /* Check that key = 2 doesn't exist. */
+ assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT);
+
+ /* Iterate over two elements. */
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
+ next_key == 0);
+ assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
+ next_key == 0);
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
+ next_key == 1);
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ errno == ENOENT);
+
+ /* Delete shouldn't succeed. */
+ key = 1;
+ assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+
+ close(fd);
+}
+
+static void test_arraymap_percpu_many_keys(void)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ BPF_DECLARE_PERCPU(long, values);
+ /* nr_keys is not too large otherwise the test stresses percpu
+ * allocator more than anything else
+ */
+ unsigned int nr_keys = 2000;
+ int key, fd, i;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
+ sizeof(bpf_percpu(values, 0)), nr_keys, 0);
+ if (fd < 0) {
+ printf("Failed to create per-cpu arraymap '%s'!\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < nr_cpus; i++)
+ bpf_percpu(values, i) = i + 10;
+
+ for (key = 0; key < nr_keys; key++)
+ assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
+
+ for (key = 0; key < nr_keys; key++) {
+ for (i = 0; i < nr_cpus; i++)
+ bpf_percpu(values, i) = 0;
+
+ assert(bpf_map_lookup_elem(fd, &key, values) == 0);
+
+ for (i = 0; i < nr_cpus; i++)
+ assert(bpf_percpu(values, i) == i + 10);
+ }
+
+ close(fd);
+}
+
+static void test_devmap(int task, void *data)
+{
+ int fd;
+ __u32 key, value;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value),
+ 2, 0);
+ if (fd < 0) {
+ printf("Failed to create arraymap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ close(fd);
+}
+
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+#include <sys/select.h>
+#include <linux/err.h>
+#define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o"
+#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
+#define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.o"
+static void test_sockmap(int tasks, void *data)
+{
+ struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_msg, *bpf_map_break;
+ int map_fd_msg = 0, map_fd_rx = 0, map_fd_tx = 0, map_fd_break;
+ int ports[] = {50200, 50201, 50202, 50204};
+ int err, i, fd, udp, sfd[6] = {0xdeadbeef};
+ u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
+ int parse_prog, verdict_prog, msg_prog;
+ struct sockaddr_in addr;
+ int one = 1, s, sc, rc;
+ struct bpf_object *obj;
+ struct timeval to;
+ __u32 key, value;
+ pid_t pid[tasks];
+ fd_set w;
+
+ /* Create some sockets to use with sockmap */
+ for (i = 0; i < 2; i++) {
+ sfd[i] = socket(AF_INET, SOCK_STREAM, 0);
+ if (sfd[i] < 0)
+ goto out;
+ err = setsockopt(sfd[i], SOL_SOCKET, SO_REUSEADDR,
+ (char *)&one, sizeof(one));
+ if (err) {
+ printf("failed to setsockopt\n");
+ goto out;
+ }
+ err = ioctl(sfd[i], FIONBIO, (char *)&one);
+ if (err < 0) {
+ printf("failed to ioctl\n");
+ goto out;
+ }
+ memset(&addr, 0, sizeof(struct sockaddr_in));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+ addr.sin_port = htons(ports[i]);
+ err = bind(sfd[i], (struct sockaddr *)&addr, sizeof(addr));
+ if (err < 0) {
+ printf("failed to bind: err %i: %i:%i\n",
+ err, i, sfd[i]);
+ goto out;
+ }
+ err = listen(sfd[i], 32);
+ if (err < 0) {
+ printf("failed to listen\n");
+ goto out;
+ }
+ }
+
+ for (i = 2; i < 4; i++) {
+ sfd[i] = socket(AF_INET, SOCK_STREAM, 0);
+ if (sfd[i] < 0)
+ goto out;
+ err = setsockopt(sfd[i], SOL_SOCKET, SO_REUSEADDR,
+ (char *)&one, sizeof(one));
+ if (err) {
+ printf("set sock opt\n");
+ goto out;
+ }
+ memset(&addr, 0, sizeof(struct sockaddr_in));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+ addr.sin_port = htons(ports[i - 2]);
+ err = connect(sfd[i], (struct sockaddr *)&addr, sizeof(addr));
+ if (err) {
+ printf("failed to connect\n");
+ goto out;
+ }
+ }
+
+
+ for (i = 4; i < 6; i++) {
+ sfd[i] = accept(sfd[i - 4], NULL, NULL);
+ if (sfd[i] < 0) {
+ printf("accept failed\n");
+ goto out;
+ }
+ }
+
+ /* Test sockmap with connected sockets */
+ fd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP,
+ sizeof(key), sizeof(value),
+ 6, 0);
+ if (fd < 0) {
+ printf("Failed to create sockmap %i\n", fd);
+ goto out_sockmap;
+ }
+
+ /* Test update with unsupported UDP socket */
+ udp = socket(AF_INET, SOCK_DGRAM, 0);
+ i = 0;
+ err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
+ if (!err) {
+ printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
+ i, udp);
+ goto out_sockmap;
+ }
+
+ /* Test update without programs */
+ for (i = 0; i < 6; i++) {
+ err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+ if (i < 2 && !err) {
+ printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n",
+ i, sfd[i]);
+ goto out_sockmap;
+ } else if (i >= 2 && err) {
+ printf("Failed noprog update sockmap '%i:%i'\n",
+ i, sfd[i]);
+ goto out_sockmap;
+ }
+ }
+
+ /* Test attaching/detaching bad fds */
+ err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (!err) {
+ printf("Failed invalid parser prog attach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!err) {
+ printf("Failed invalid verdict prog attach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(-1, fd, BPF_SK_MSG_VERDICT, 0);
+ if (!err) {
+ printf("Failed invalid msg verdict prog attach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(-1, fd, __MAX_BPF_ATTACH_TYPE, 0);
+ if (!err) {
+ printf("Failed unknown prog attach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_PARSER);
+ if (err) {
+ printf("Failed empty parser prog detach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_VERDICT);
+ if (err) {
+ printf("Failed empty verdict prog detach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_detach(fd, BPF_SK_MSG_VERDICT);
+ if (err) {
+ printf("Failed empty msg verdict prog detach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_detach(fd, __MAX_BPF_ATTACH_TYPE);
+ if (!err) {
+ printf("Detach invalid prog successful\n");
+ goto out_sockmap;
+ }
+
+ /* Load SK_SKB program and Attach */
+ err = bpf_prog_load(SOCKMAP_PARSE_PROG,
+ BPF_PROG_TYPE_SK_SKB, &obj, &parse_prog);
+ if (err) {
+ printf("Failed to load SK_SKB parse prog\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_load(SOCKMAP_TCP_MSG_PROG,
+ BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog);
+ if (err) {
+ printf("Failed to load SK_SKB msg prog\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_load(SOCKMAP_VERDICT_PROG,
+ BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog);
+ if (err) {
+ printf("Failed to load SK_SKB verdict prog\n");
+ goto out_sockmap;
+ }
+
+ bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx");
+ if (IS_ERR(bpf_map_rx)) {
+ printf("Failed to load map rx from verdict prog\n");
+ goto out_sockmap;
+ }
+
+ map_fd_rx = bpf_map__fd(bpf_map_rx);
+ if (map_fd_rx < 0) {
+ printf("Failed to get map rx fd\n");
+ goto out_sockmap;
+ }
+
+ bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx");
+ if (IS_ERR(bpf_map_tx)) {
+ printf("Failed to load map tx from verdict prog\n");
+ goto out_sockmap;
+ }
+
+ map_fd_tx = bpf_map__fd(bpf_map_tx);
+ if (map_fd_tx < 0) {
+ printf("Failed to get map tx fd\n");
+ goto out_sockmap;
+ }
+
+ bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg");
+ if (IS_ERR(bpf_map_msg)) {
+ printf("Failed to load map msg from msg_verdict prog\n");
+ goto out_sockmap;
+ }
+
+ map_fd_msg = bpf_map__fd(bpf_map_msg);
+ if (map_fd_msg < 0) {
+ printf("Failed to get map msg fd\n");
+ goto out_sockmap;
+ }
+
+ bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
+ if (IS_ERR(bpf_map_break)) {
+ printf("Failed to load map tx from verdict prog\n");
+ goto out_sockmap;
+ }
+
+ map_fd_break = bpf_map__fd(bpf_map_break);
+ if (map_fd_break < 0) {
+ printf("Failed to get map tx fd\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(parse_prog, map_fd_break,
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (!err) {
+ printf("Allowed attaching SK_SKB program to invalid map\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(parse_prog, map_fd_rx,
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ printf("Failed stream parser bpf prog attach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(verdict_prog, map_fd_rx,
+ BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err) {
+ printf("Failed stream verdict bpf prog attach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(msg_prog, map_fd_msg, BPF_SK_MSG_VERDICT, 0);
+ if (err) {
+ printf("Failed msg verdict bpf prog attach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(verdict_prog, map_fd_rx,
+ __MAX_BPF_ATTACH_TYPE, 0);
+ if (!err) {
+ printf("Attached unknown bpf prog\n");
+ goto out_sockmap;
+ }
+
+ /* Test map update elem afterwards fd lives in fd and map_fd */
+ for (i = 2; i < 6; i++) {
+ err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
+ if (err) {
+ printf("Failed map_fd_rx update sockmap %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ err = bpf_map_update_elem(map_fd_tx, &i, &sfd[i], BPF_ANY);
+ if (err) {
+ printf("Failed map_fd_tx update sockmap %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ }
+
+ /* Test map delete elem and remove send/recv sockets */
+ for (i = 2; i < 4; i++) {
+ err = bpf_map_delete_elem(map_fd_rx, &i);
+ if (err) {
+ printf("Failed delete sockmap rx %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ err = bpf_map_delete_elem(map_fd_tx, &i);
+ if (err) {
+ printf("Failed delete sockmap tx %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ }
+
+ /* Put sfd[2] (sending fd below) into msg map to test sendmsg bpf */
+ i = 0;
+ err = bpf_map_update_elem(map_fd_msg, &i, &sfd[2], BPF_ANY);
+ if (err) {
+ printf("Failed map_fd_msg update sockmap %i\n", err);
+ goto out_sockmap;
+ }
+
+ /* Test map send/recv */
+ for (i = 0; i < 2; i++) {
+ buf[0] = i;
+ buf[1] = 0x5;
+ sc = send(sfd[2], buf, 20, 0);
+ if (sc < 0) {
+ printf("Failed sockmap send\n");
+ goto out_sockmap;
+ }
+
+ FD_ZERO(&w);
+ FD_SET(sfd[3], &w);
+ to.tv_sec = 30;
+ to.tv_usec = 0;
+ s = select(sfd[3] + 1, &w, NULL, NULL, &to);
+ if (s == -1) {
+ perror("Failed sockmap select()");
+ goto out_sockmap;
+ } else if (!s) {
+ printf("Failed sockmap unexpected timeout\n");
+ goto out_sockmap;
+ }
+
+ if (!FD_ISSET(sfd[3], &w)) {
+ printf("Failed sockmap select/recv\n");
+ goto out_sockmap;
+ }
+
+ rc = recv(sfd[3], buf, sizeof(buf), 0);
+ if (rc < 0) {
+ printf("Failed sockmap recv\n");
+ goto out_sockmap;
+ }
+ }
+
+ /* Negative null entry lookup from datapath should be dropped */
+ buf[0] = 1;
+ buf[1] = 12;
+ sc = send(sfd[2], buf, 20, 0);
+ if (sc < 0) {
+ printf("Failed sockmap send\n");
+ goto out_sockmap;
+ }
+
+ /* Push fd into same slot */
+ i = 2;
+ err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST);
+ if (!err) {
+ printf("Failed allowed sockmap dup slot BPF_NOEXIST\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+ if (err) {
+ printf("Failed sockmap update new slot BPF_ANY\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST);
+ if (err) {
+ printf("Failed sockmap update new slot BPF_EXIST\n");
+ goto out_sockmap;
+ }
+
+ /* Delete the elems without programs */
+ for (i = 2; i < 6; i++) {
+ err = bpf_map_delete_elem(fd, &i);
+ if (err) {
+ printf("Failed delete sockmap %i '%i:%i'\n",
+ err, i, sfd[i]);
+ }
+ }
+
+ /* Test having multiple maps open and set with programs on same fds */
+ err = bpf_prog_attach(parse_prog, fd,
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ printf("Failed fd bpf parse prog attach\n");
+ goto out_sockmap;
+ }
+ err = bpf_prog_attach(verdict_prog, fd,
+ BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err) {
+ printf("Failed fd bpf verdict prog attach\n");
+ goto out_sockmap;
+ }
+
+ for (i = 4; i < 6; i++) {
+ err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+ if (!err) {
+ printf("Failed allowed duplicate programs in update ANY sockmap %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST);
+ if (!err) {
+ printf("Failed allowed duplicate program in update NOEXIST sockmap %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST);
+ if (!err) {
+ printf("Failed allowed duplicate program in update EXIST sockmap %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ }
+
+ /* Test tasks number of forked operations */
+ for (i = 0; i < tasks; i++) {
+ pid[i] = fork();
+ if (pid[i] == 0) {
+ for (i = 0; i < 6; i++) {
+ bpf_map_delete_elem(map_fd_tx, &i);
+ bpf_map_delete_elem(map_fd_rx, &i);
+ bpf_map_update_elem(map_fd_tx, &i,
+ &sfd[i], BPF_ANY);
+ bpf_map_update_elem(map_fd_rx, &i,
+ &sfd[i], BPF_ANY);
+ }
+ exit(0);
+ } else if (pid[i] == -1) {
+ printf("Couldn't spawn #%d process!\n", i);
+ exit(1);
+ }
+ }
+
+ for (i = 0; i < tasks; i++) {
+ int status;
+
+ assert(waitpid(pid[i], &status, 0) == pid[i]);
+ assert(status == 0);
+ }
+
+ err = bpf_prog_detach(map_fd_rx, __MAX_BPF_ATTACH_TYPE);
+ if (!err) {
+ printf("Detached an invalid prog type.\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_PARSER);
+ if (err) {
+ printf("Failed parser prog detach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_VERDICT);
+ if (err) {
+ printf("Failed parser prog detach\n");
+ goto out_sockmap;
+ }
+
+ /* Test map close sockets and empty maps */
+ for (i = 0; i < 6; i++) {
+ bpf_map_delete_elem(map_fd_tx, &i);
+ bpf_map_delete_elem(map_fd_rx, &i);
+ close(sfd[i]);
+ }
+ close(fd);
+ close(map_fd_rx);
+ bpf_object__close(obj);
+ return;
+out:
+ for (i = 0; i < 6; i++)
+ close(sfd[i]);
+ printf("Failed to create sockmap '%i:%s'!\n", i, strerror(errno));
+ exit(1);
+out_sockmap:
+ for (i = 0; i < 6; i++) {
+ if (map_fd_tx)
+ bpf_map_delete_elem(map_fd_tx, &i);
+ if (map_fd_rx)
+ bpf_map_delete_elem(map_fd_rx, &i);
+ close(sfd[i]);
+ }
+ close(fd);
+ exit(1);
+}
+
+#define MAP_SIZE (32 * 1024)
+
+static void test_map_large(void)
+{
+ struct bigkey {
+ int a;
+ char b[116];
+ long long c;
+ } key;
+ int fd, i, value;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ MAP_SIZE, map_flags);
+ if (fd < 0) {
+ printf("Failed to create large map '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < MAP_SIZE; i++) {
+ key = (struct bigkey) { .c = i };
+ value = i;
+
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
+ }
+
+ key.c = -1;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == E2BIG);
+
+ /* Iterate through all elements. */
+ assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
+ key.c = -1;
+ for (i = 0; i < MAP_SIZE; i++)
+ assert(bpf_map_get_next_key(fd, &key, &key) == 0);
+ assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+
+ key.c = 0;
+ assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
+ key.a = 1;
+ assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+
+ close(fd);
+}
+
+#define run_parallel(N, FN, DATA) \
+ printf("Fork %d tasks to '" #FN "'\n", N); \
+ __run_parallel(N, FN, DATA)
+
+static void __run_parallel(int tasks, void (*fn)(int task, void *data),
+ void *data)
+{
+ pid_t pid[tasks];
+ int i;
+
+ fflush(stdout);
+
+ for (i = 0; i < tasks; i++) {
+ pid[i] = fork();
+ if (pid[i] == 0) {
+ fn(i, data);
+ exit(0);
+ } else if (pid[i] == -1) {
+ printf("Couldn't spawn #%d process!\n", i);
+ exit(1);
+ }
+ }
+
+ for (i = 0; i < tasks; i++) {
+ int status;
+
+ assert(waitpid(pid[i], &status, 0) == pid[i]);
+ assert(status == 0);
+ }
+}
+
+static void test_map_stress(void)
+{
+ run_parallel(100, test_hashmap, NULL);
+ run_parallel(100, test_hashmap_percpu, NULL);
+ run_parallel(100, test_hashmap_sizes, NULL);
+ run_parallel(100, test_hashmap_walk, NULL);
+
+ run_parallel(100, test_arraymap, NULL);
+ run_parallel(100, test_arraymap_percpu, NULL);
+}
+
+#define TASKS 1024
+
+#define DO_UPDATE 1
+#define DO_DELETE 0
+
+static void test_update_delete(int fn, void *data)
+{
+ int do_update = ((int *)data)[1];
+ int fd = ((int *)data)[0];
+ int i, key, value;
+
+ for (i = fn; i < MAP_SIZE; i += TASKS) {
+ key = value = i;
+
+ if (do_update) {
+ assert(bpf_map_update_elem(fd, &key, &value,
+ BPF_NOEXIST) == 0);
+ assert(bpf_map_update_elem(fd, &key, &value,
+ BPF_EXIST) == 0);
+ } else {
+ assert(bpf_map_delete_elem(fd, &key) == 0);
+ }
+ }
+}
+
+static void test_map_parallel(void)
+{
+ int i, fd, key = 0, value = 0;
+ int data[2];
+
+ fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ MAP_SIZE, map_flags);
+ if (fd < 0) {
+ printf("Failed to create map for parallel test '%s'!\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ /* Use the same fd in children to add elements to this map:
+ * child_0 adds key=0, key=1024, key=2048, ...
+ * child_1 adds key=1, key=1025, key=2049, ...
+ * child_1023 adds key=1023, ...
+ */
+ data[0] = fd;
+ data[1] = DO_UPDATE;
+ run_parallel(TASKS, test_update_delete, data);
+
+ /* Check that key=0 is already there. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == EEXIST);
+
+ /* Check that all elements were inserted. */
+ assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
+ key = -1;
+ for (i = 0; i < MAP_SIZE; i++)
+ assert(bpf_map_get_next_key(fd, &key, &key) == 0);
+ assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+
+ /* Another check for all elements */
+ for (i = 0; i < MAP_SIZE; i++) {
+ key = MAP_SIZE - i - 1;
+
+ assert(bpf_map_lookup_elem(fd, &key, &value) == 0 &&
+ value == key);
+ }
+
+ /* Now let's delete all elemenets in parallel. */
+ data[1] = DO_DELETE;
+ run_parallel(TASKS, test_update_delete, data);
+
+ /* Nothing should be left. */
+ key = -1;
+ assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+}
+
+static void test_map_rdonly(void)
+{
+ int fd, key = 0, value = 0;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ MAP_SIZE, map_flags | BPF_F_RDONLY);
+ if (fd < 0) {
+ printf("Failed to create map for read only test '%s'!\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ key = 1;
+ value = 1234;
+ /* Insert key=1 element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
+ errno == EPERM);
+
+ /* Check that key=2 is not found. */
+ assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
+}
+
+static void test_map_wronly(void)
+{
+ int fd, key = 0, value = 0;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ MAP_SIZE, map_flags | BPF_F_WRONLY);
+ if (fd < 0) {
+ printf("Failed to create map for read only test '%s'!\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ key = 1;
+ value = 1234;
+ /* Insert key=1 element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+ /* Check that key=2 is not found. */
+ assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
+ assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
+}
+
+static void prepare_reuseport_grp(int type, int map_fd,
+ __s64 *fds64, __u64 *sk_cookies,
+ unsigned int n)
+{
+ socklen_t optlen, addrlen;
+ struct sockaddr_in6 s6;
+ const __u32 index0 = 0;
+ const int optval = 1;
+ unsigned int i;
+ u64 sk_cookie;
+ __s64 fd64;
+ int err;
+
+ s6.sin6_family = AF_INET6;
+ s6.sin6_addr = in6addr_any;
+ s6.sin6_port = 0;
+ addrlen = sizeof(s6);
+ optlen = sizeof(sk_cookie);
+
+ for (i = 0; i < n; i++) {
+ fd64 = socket(AF_INET6, type, 0);
+ CHECK(fd64 == -1, "socket()",
+ "sock_type:%d fd64:%lld errno:%d\n",
+ type, fd64, errno);
+
+ err = setsockopt(fd64, SOL_SOCKET, SO_REUSEPORT,
+ &optval, sizeof(optval));
+ CHECK(err == -1, "setsockopt(SO_REUSEPORT)",
+ "err:%d errno:%d\n", err, errno);
+
+ /* reuseport_array does not allow unbound sk */
+ err = bpf_map_update_elem(map_fd, &index0, &fd64,
+ BPF_ANY);
+ CHECK(err != -1 || errno != EINVAL,
+ "reuseport array update unbound sk",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+
+ err = bind(fd64, (struct sockaddr *)&s6, sizeof(s6));
+ CHECK(err == -1, "bind()",
+ "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+ if (i == 0) {
+ err = getsockname(fd64, (struct sockaddr *)&s6,
+ &addrlen);
+ CHECK(err == -1, "getsockname()",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ }
+
+ err = getsockopt(fd64, SOL_SOCKET, SO_COOKIE, &sk_cookie,
+ &optlen);
+ CHECK(err == -1, "getsockopt(SO_COOKIE)",
+ "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+ if (type == SOCK_STREAM) {
+ /*
+ * reuseport_array does not allow
+ * non-listening tcp sk.
+ */
+ err = bpf_map_update_elem(map_fd, &index0, &fd64,
+ BPF_ANY);
+ CHECK(err != -1 || errno != EINVAL,
+ "reuseport array update non-listening sk",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ err = listen(fd64, 0);
+ CHECK(err == -1, "listen()",
+ "sock_type:%d, err:%d errno:%d\n",
+ type, err, errno);
+ }
+
+ fds64[i] = fd64;
+ sk_cookies[i] = sk_cookie;
+ }
+}
+
+static void test_reuseport_array(void)
+{
+#define REUSEPORT_FD_IDX(err, last) ({ (err) ? last : !last; })
+
+ const __u32 array_size = 4, index0 = 0, index3 = 3;
+ int types[2] = { SOCK_STREAM, SOCK_DGRAM }, type;
+ __u64 grpa_cookies[2], sk_cookie, map_cookie;
+ __s64 grpa_fds64[2] = { -1, -1 }, fd64 = -1;
+ const __u32 bad_index = array_size;
+ int map_fd, err, t, f;
+ __u32 fds_idx = 0;
+ int fd;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ sizeof(__u32), sizeof(__u64), array_size, 0);
+ CHECK(map_fd == -1, "reuseport array create",
+ "map_fd:%d, errno:%d\n", map_fd, errno);
+
+ /* Test lookup/update/delete with invalid index */
+ err = bpf_map_delete_elem(map_fd, &bad_index);
+ CHECK(err != -1 || errno != E2BIG, "reuseport array del >=max_entries",
+ "err:%d errno:%d\n", err, errno);
+
+ err = bpf_map_update_elem(map_fd, &bad_index, &fd64, BPF_ANY);
+ CHECK(err != -1 || errno != E2BIG,
+ "reuseport array update >=max_entries",
+ "err:%d errno:%d\n", err, errno);
+
+ err = bpf_map_lookup_elem(map_fd, &bad_index, &map_cookie);
+ CHECK(err != -1 || errno != ENOENT,
+ "reuseport array update >=max_entries",
+ "err:%d errno:%d\n", err, errno);
+
+ /* Test lookup/delete non existence elem */
+ err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+ CHECK(err != -1 || errno != ENOENT,
+ "reuseport array lookup not-exist elem",
+ "err:%d errno:%d\n", err, errno);
+ err = bpf_map_delete_elem(map_fd, &index3);
+ CHECK(err != -1 || errno != ENOENT,
+ "reuseport array del not-exist elem",
+ "err:%d errno:%d\n", err, errno);
+
+ for (t = 0; t < ARRAY_SIZE(types); t++) {
+ type = types[t];
+
+ prepare_reuseport_grp(type, map_fd, grpa_fds64,
+ grpa_cookies, ARRAY_SIZE(grpa_fds64));
+
+ /* Test BPF_* update flags */
+ /* BPF_EXIST failure case */
+ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+ BPF_EXIST);
+ CHECK(err != -1 || errno != ENOENT,
+ "reuseport array update empty elem BPF_EXIST",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+ /* BPF_NOEXIST success case */
+ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+ BPF_NOEXIST);
+ CHECK(err == -1,
+ "reuseport array update empty elem BPF_NOEXIST",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+ /* BPF_EXIST success case. */
+ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+ BPF_EXIST);
+ CHECK(err == -1,
+ "reuseport array update same elem BPF_EXIST",
+ "sock_type:%d err:%d errno:%d\n", type, err, errno);
+ fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+ /* BPF_NOEXIST failure case */
+ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+ BPF_NOEXIST);
+ CHECK(err != -1 || errno != EEXIST,
+ "reuseport array update non-empty elem BPF_NOEXIST",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+ /* BPF_ANY case (always succeed) */
+ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+ BPF_ANY);
+ CHECK(err == -1,
+ "reuseport array update same sk with BPF_ANY",
+ "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+ fd64 = grpa_fds64[fds_idx];
+ sk_cookie = grpa_cookies[fds_idx];
+
+ /* The same sk cannot be added to reuseport_array twice */
+ err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_ANY);
+ CHECK(err != -1 || errno != EBUSY,
+ "reuseport array update same sk with same index",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+
+ err = bpf_map_update_elem(map_fd, &index0, &fd64, BPF_ANY);
+ CHECK(err != -1 || errno != EBUSY,
+ "reuseport array update same sk with different index",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+
+ /* Test delete elem */
+ err = bpf_map_delete_elem(map_fd, &index3);
+ CHECK(err == -1, "reuseport array delete sk",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+
+ /* Add it back with BPF_NOEXIST */
+ err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
+ CHECK(err == -1,
+ "reuseport array re-add with BPF_NOEXIST after del",
+ "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+ /* Test cookie */
+ err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+ CHECK(err == -1 || sk_cookie != map_cookie,
+ "reuseport array lookup re-added sk",
+ "sock_type:%d err:%d errno:%d sk_cookie:0x%llx map_cookie:0x%llxn",
+ type, err, errno, sk_cookie, map_cookie);
+
+ /* Test elem removed by close() */
+ for (f = 0; f < ARRAY_SIZE(grpa_fds64); f++)
+ close(grpa_fds64[f]);
+ err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+ CHECK(err != -1 || errno != ENOENT,
+ "reuseport array lookup after close()",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ }
+
+ /* Test SOCK_RAW */
+ fd64 = socket(AF_INET6, SOCK_RAW, IPPROTO_UDP);
+ CHECK(fd64 == -1, "socket(SOCK_RAW)", "err:%d errno:%d\n",
+ err, errno);
+ err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
+ CHECK(err != -1 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
+ "err:%d errno:%d\n", err, errno);
+ close(fd64);
+
+ /* Close the 64 bit value map */
+ close(map_fd);
+
+ /* Test 32 bit fd */
+ map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ sizeof(__u32), sizeof(__u32), array_size, 0);
+ CHECK(map_fd == -1, "reuseport array create",
+ "map_fd:%d, errno:%d\n", map_fd, errno);
+ prepare_reuseport_grp(SOCK_STREAM, map_fd, &fd64, &sk_cookie, 1);
+ fd = fd64;
+ err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST);
+ CHECK(err == -1, "reuseport array update 32 bit fd",
+ "err:%d errno:%d\n", err, errno);
+ err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+ CHECK(err != -1 || errno != ENOSPC,
+ "reuseport array lookup 32 bit fd",
+ "err:%d errno:%d\n", err, errno);
+ close(fd);
+ close(map_fd);
+}
+
+static void run_all_tests(void)
+{
+ test_hashmap(0, NULL);
+ test_hashmap_percpu(0, NULL);
+ test_hashmap_walk(0, NULL);
+
+ test_arraymap(0, NULL);
+ test_arraymap_percpu(0, NULL);
+
+ test_arraymap_percpu_many_keys();
+
+ test_devmap(0, NULL);
+ test_sockmap(0, NULL);
+
+ test_map_large();
+ test_map_parallel();
+ test_map_stress();
+
+ test_map_rdonly();
+ test_map_wronly();
+
+ test_reuseport_array();
+}
+
+int main(void)
+{
+ map_flags = 0;
+ run_all_tests();
+
+ map_flags = BPF_F_NO_PREALLOC;
+ run_all_tests();
+
+ printf("test_maps: OK\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_obj_id.c b/tools/testing/selftests/bpf/test_obj_id.c
new file mode 100644
index 000000000..880d2963b
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_obj_id.c
@@ -0,0 +1,35 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+/* It is a dumb bpf program such that it must have no
+ * issue to be loaded since testing the verifier is
+ * not the focus here.
+ */
+
+int _version SEC("version") = 1;
+
+struct bpf_map_def SEC("maps") test_map_id = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = 1,
+};
+
+SEC("test_obj_id_dummy")
+int test_obj_id(struct __sk_buff *skb)
+{
+ __u32 key = 0;
+ __u64 *value;
+
+ value = bpf_map_lookup_elem(&test_map_id, &key);
+
+ return TC_ACT_OK;
+}
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
new file mode 100755
index 000000000..6b46db61c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -0,0 +1,1288 @@
+#!/usr/bin/env python3
+
+# Copyright (C) 2017 Netronome Systems, Inc.
+#
+# This software is licensed under the GNU General License Version 2,
+# June 1991 as shown in the file COPYING in the top-level directory of this
+# source tree.
+#
+# THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
+# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
+# OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+# THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+from datetime import datetime
+import argparse
+import json
+import os
+import pprint
+import random
+import string
+import struct
+import subprocess
+import time
+
+logfile = None
+log_level = 1
+skip_extack = False
+bpf_test_dir = os.path.dirname(os.path.realpath(__file__))
+pp = pprint.PrettyPrinter()
+devs = [] # devices we created for clean up
+files = [] # files to be removed
+netns = [] # net namespaces to be removed
+
+def log_get_sec(level=0):
+ return "*" * (log_level + level)
+
+def log_level_inc(add=1):
+ global log_level
+ log_level += add
+
+def log_level_dec(sub=1):
+ global log_level
+ log_level -= sub
+
+def log_level_set(level):
+ global log_level
+ log_level = level
+
+def log(header, data, level=None):
+ """
+ Output to an optional log.
+ """
+ if logfile is None:
+ return
+ if level is not None:
+ log_level_set(level)
+
+ if not isinstance(data, str):
+ data = pp.pformat(data)
+
+ if len(header):
+ logfile.write("\n" + log_get_sec() + " ")
+ logfile.write(header)
+ if len(header) and len(data.strip()):
+ logfile.write("\n")
+ logfile.write(data)
+
+def skip(cond, msg):
+ if not cond:
+ return
+ print("SKIP: " + msg)
+ log("SKIP: " + msg, "", level=1)
+ os.sys.exit(0)
+
+def fail(cond, msg):
+ if not cond:
+ return
+ print("FAIL: " + msg)
+ log("FAIL: " + msg, "", level=1)
+ os.sys.exit(1)
+
+def start_test(msg):
+ log(msg, "", level=1)
+ log_level_inc()
+ print(msg)
+
+def cmd(cmd, shell=True, include_stderr=False, background=False, fail=True):
+ """
+ Run a command in subprocess and return tuple of (retval, stdout);
+ optionally return stderr as well as third value.
+ """
+ proc = subprocess.Popen(cmd, shell=shell, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ if background:
+ msg = "%s START: %s" % (log_get_sec(1),
+ datetime.now().strftime("%H:%M:%S.%f"))
+ log("BKG " + proc.args, msg)
+ return proc
+
+ return cmd_result(proc, include_stderr=include_stderr, fail=fail)
+
+def cmd_result(proc, include_stderr=False, fail=False):
+ stdout, stderr = proc.communicate()
+ stdout = stdout.decode("utf-8")
+ stderr = stderr.decode("utf-8")
+ proc.stdout.close()
+ proc.stderr.close()
+
+ stderr = "\n" + stderr
+ if stderr[-1] == "\n":
+ stderr = stderr[:-1]
+
+ sec = log_get_sec(1)
+ log("CMD " + proc.args,
+ "RETCODE: %d\n%s STDOUT:\n%s%s STDERR:%s\n%s END: %s" %
+ (proc.returncode, sec, stdout, sec, stderr,
+ sec, datetime.now().strftime("%H:%M:%S.%f")))
+
+ if proc.returncode != 0 and fail:
+ if len(stderr) > 0 and stderr[-1] == "\n":
+ stderr = stderr[:-1]
+ raise Exception("Command failed: %s\n%s" % (proc.args, stderr))
+
+ if include_stderr:
+ return proc.returncode, stdout, stderr
+ else:
+ return proc.returncode, stdout
+
+def rm(f):
+ cmd("rm -f %s" % (f))
+ if f in files:
+ files.remove(f)
+
+def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False):
+ params = ""
+ if JSON:
+ params += "%s " % (flags["json"])
+
+ if ns != "":
+ ns = "ip netns exec %s " % (ns)
+
+ if include_stderr:
+ ret, stdout, stderr = cmd(ns + name + " " + params + args,
+ fail=fail, include_stderr=True)
+ else:
+ ret, stdout = cmd(ns + name + " " + params + args,
+ fail=fail, include_stderr=False)
+
+ if JSON and len(stdout.strip()) != 0:
+ out = json.loads(stdout)
+ else:
+ out = stdout
+
+ if include_stderr:
+ return ret, out, stderr
+ else:
+ return ret, out
+
+def bpftool(args, JSON=True, ns="", fail=True, include_stderr=False):
+ return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns,
+ fail=fail, include_stderr=include_stderr)
+
+def bpftool_prog_list(expected=None, ns=""):
+ _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
+ # Remove the base progs
+ for p in base_progs:
+ if p in progs:
+ progs.remove(p)
+ if expected is not None:
+ if len(progs) != expected:
+ fail(True, "%d BPF programs loaded, expected %d" %
+ (len(progs), expected))
+ return progs
+
+def bpftool_map_list(expected=None, ns=""):
+ _, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
+ # Remove the base maps
+ for m in base_maps:
+ if m in maps:
+ maps.remove(m)
+ if expected is not None:
+ if len(maps) != expected:
+ fail(True, "%d BPF maps loaded, expected %d" %
+ (len(maps), expected))
+ return maps
+
+def bpftool_prog_list_wait(expected=0, n_retry=20):
+ for i in range(n_retry):
+ nprogs = len(bpftool_prog_list())
+ if nprogs == expected:
+ return
+ time.sleep(0.05)
+ raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
+
+def bpftool_map_list_wait(expected=0, n_retry=20):
+ for i in range(n_retry):
+ nmaps = len(bpftool_map_list())
+ if nmaps == expected:
+ return
+ time.sleep(0.05)
+ raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
+
+def bpftool_prog_load(sample, file_name, maps=[], prog_type="xdp", dev=None,
+ fail=True, include_stderr=False):
+ args = "prog load %s %s" % (os.path.join(bpf_test_dir, sample), file_name)
+ if prog_type is not None:
+ args += " type " + prog_type
+ if dev is not None:
+ args += " dev " + dev
+ if len(maps):
+ args += " map " + " map ".join(maps)
+
+ res = bpftool(args, fail=fail, include_stderr=include_stderr)
+ if res[0] == 0:
+ files.append(file_name)
+ return res
+
+def ip(args, force=False, JSON=True, ns="", fail=True, include_stderr=False):
+ if force:
+ args = "-force " + args
+ return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns,
+ fail=fail, include_stderr=include_stderr)
+
+def tc(args, JSON=True, ns="", fail=True, include_stderr=False):
+ return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns,
+ fail=fail, include_stderr=include_stderr)
+
+def ethtool(dev, opt, args, fail=True):
+ return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail)
+
+def bpf_obj(name, sec=".text", path=bpf_test_dir,):
+ return "obj %s sec %s" % (os.path.join(path, name), sec)
+
+def bpf_pinned(name):
+ return "pinned %s" % (name)
+
+def bpf_bytecode(bytecode):
+ return "bytecode \"%s\"" % (bytecode)
+
+def mknetns(n_retry=10):
+ for i in range(n_retry):
+ name = ''.join([random.choice(string.ascii_letters) for i in range(8)])
+ ret, _ = ip("netns add %s" % (name), fail=False)
+ if ret == 0:
+ netns.append(name)
+ return name
+ return None
+
+def int2str(fmt, val):
+ ret = []
+ for b in struct.pack(fmt, val):
+ ret.append(int(b))
+ return " ".join(map(lambda x: str(x), ret))
+
+def str2int(strtab):
+ inttab = []
+ for i in strtab:
+ inttab.append(int(i, 16))
+ ba = bytearray(inttab)
+ if len(strtab) == 4:
+ fmt = "I"
+ elif len(strtab) == 8:
+ fmt = "Q"
+ else:
+ raise Exception("String array of len %d can't be unpacked to an int" %
+ (len(strtab)))
+ return struct.unpack(fmt, ba)[0]
+
+class DebugfsDir:
+ """
+ Class for accessing DebugFS directories as a dictionary.
+ """
+
+ def __init__(self, path):
+ self.path = path
+ self._dict = self._debugfs_dir_read(path)
+
+ def __len__(self):
+ return len(self._dict.keys())
+
+ def __getitem__(self, key):
+ if type(key) is int:
+ key = list(self._dict.keys())[key]
+ return self._dict[key]
+
+ def __setitem__(self, key, value):
+ log("DebugFS set %s = %s" % (key, value), "")
+ log_level_inc()
+
+ cmd("echo '%s' > %s/%s" % (value, self.path, key))
+ log_level_dec()
+
+ _, out = cmd('cat %s/%s' % (self.path, key))
+ self._dict[key] = out.strip()
+
+ def _debugfs_dir_read(self, path):
+ dfs = {}
+
+ log("DebugFS state for %s" % (path), "")
+ log_level_inc(add=2)
+
+ _, out = cmd('ls ' + path)
+ for f in out.split():
+ p = os.path.join(path, f)
+ if os.path.isfile(p):
+ _, out = cmd('cat %s/%s' % (path, f))
+ dfs[f] = out.strip()
+ elif os.path.isdir(p):
+ dfs[f] = DebugfsDir(p)
+ else:
+ raise Exception("%s is neither file nor directory" % (p))
+
+ log_level_dec()
+ log("DebugFS state", dfs)
+ log_level_dec()
+
+ return dfs
+
+class NetdevSim:
+ """
+ Class for netdevsim netdevice and its attributes.
+ """
+
+ def __init__(self, link=None):
+ self.link = link
+
+ self.dev = self._netdevsim_create()
+ devs.append(self)
+
+ self.ns = ""
+
+ self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname'])
+ self.sdev_dir = self.dfs_dir + '/sdev/'
+ self.dfs_refresh()
+
+ def __getitem__(self, key):
+ return self.dev[key]
+
+ def _netdevsim_create(self):
+ link = "" if self.link is None else "link " + self.link.dev['ifname']
+ _, old = ip("link show")
+ ip("link add sim%d {link} type netdevsim".format(link=link))
+ _, new = ip("link show")
+
+ for dev in new:
+ f = filter(lambda x: x["ifname"] == dev["ifname"], old)
+ if len(list(f)) == 0:
+ return dev
+
+ raise Exception("failed to create netdevsim device")
+
+ def remove(self):
+ devs.remove(self)
+ ip("link del dev %s" % (self.dev["ifname"]), ns=self.ns)
+
+ def dfs_refresh(self):
+ self.dfs = DebugfsDir(self.dfs_dir)
+ return self.dfs
+
+ def dfs_read(self, f):
+ path = os.path.join(self.dfs_dir, f)
+ _, data = cmd('cat %s' % (path))
+ return data.strip()
+
+ def dfs_num_bound_progs(self):
+ path = os.path.join(self.sdev_dir, "bpf_bound_progs")
+ _, progs = cmd('ls %s' % (path))
+ return len(progs.split())
+
+ def dfs_get_bound_progs(self, expected):
+ progs = DebugfsDir(os.path.join(self.sdev_dir, "bpf_bound_progs"))
+ if expected is not None:
+ if len(progs) != expected:
+ fail(True, "%d BPF programs bound, expected %d" %
+ (len(progs), expected))
+ return progs
+
+ def wait_for_flush(self, bound=0, total=0, n_retry=20):
+ for i in range(n_retry):
+ nbound = self.dfs_num_bound_progs()
+ nprogs = len(bpftool_prog_list())
+ if nbound == bound and nprogs == total:
+ return
+ time.sleep(0.05)
+ raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs))
+
+ def set_ns(self, ns):
+ name = "1" if ns == "" else ns
+ ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns)
+ self.ns = ns
+
+ def set_mtu(self, mtu, fail=True):
+ return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
+ fail=fail)
+
+ def set_xdp(self, bpf, mode, force=False, JSON=True, verbose=False,
+ fail=True, include_stderr=False):
+ if verbose:
+ bpf += " verbose"
+ return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf),
+ force=force, JSON=JSON,
+ fail=fail, include_stderr=include_stderr)
+
+ def unset_xdp(self, mode, force=False, JSON=True,
+ fail=True, include_stderr=False):
+ return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode),
+ force=force, JSON=JSON,
+ fail=fail, include_stderr=include_stderr)
+
+ def ip_link_show(self, xdp):
+ _, link = ip("link show dev %s" % (self['ifname']))
+ if len(link) > 1:
+ raise Exception("Multiple objects on ip link show")
+ if len(link) < 1:
+ return {}
+ fail(xdp != "xdp" in link,
+ "XDP program not reporting in iplink (reported %s, expected %s)" %
+ ("xdp" in link, xdp))
+ return link[0]
+
+ def tc_add_ingress(self):
+ tc("qdisc add dev %s ingress" % (self['ifname']))
+
+ def tc_del_ingress(self):
+ tc("qdisc del dev %s ingress" % (self['ifname']))
+
+ def tc_flush_filters(self, bound=0, total=0):
+ self.tc_del_ingress()
+ self.tc_add_ingress()
+ self.wait_for_flush(bound=bound, total=total)
+
+ def tc_show_ingress(self, expected=None):
+ # No JSON support, oh well...
+ flags = ["skip_sw", "skip_hw", "in_hw"]
+ named = ["protocol", "pref", "chain", "handle", "id", "tag"]
+
+ args = "-s filter show dev %s ingress" % (self['ifname'])
+ _, out = tc(args, JSON=False)
+
+ filters = []
+ lines = out.split('\n')
+ for line in lines:
+ words = line.split()
+ if "handle" not in words:
+ continue
+ fltr = {}
+ for flag in flags:
+ fltr[flag] = flag in words
+ for name in named:
+ try:
+ idx = words.index(name)
+ fltr[name] = words[idx + 1]
+ except ValueError:
+ pass
+ filters.append(fltr)
+
+ if expected is not None:
+ fail(len(filters) != expected,
+ "%d ingress filters loaded, expected %d" %
+ (len(filters), expected))
+ return filters
+
+ def cls_filter_op(self, op, qdisc="ingress", prio=None, handle=None,
+ chain=None, cls="", params="",
+ fail=True, include_stderr=False):
+ spec = ""
+ if prio is not None:
+ spec += " prio %d" % (prio)
+ if handle:
+ spec += " handle %s" % (handle)
+ if chain is not None:
+ spec += " chain %d" % (chain)
+
+ return tc("filter {op} dev {dev} {qdisc} {spec} {cls} {params}"\
+ .format(op=op, dev=self['ifname'], qdisc=qdisc, spec=spec,
+ cls=cls, params=params),
+ fail=fail, include_stderr=include_stderr)
+
+ def cls_bpf_add_filter(self, bpf, op="add", prio=None, handle=None,
+ chain=None, da=False, verbose=False,
+ skip_sw=False, skip_hw=False,
+ fail=True, include_stderr=False):
+ cls = "bpf " + bpf
+
+ params = ""
+ if da:
+ params += " da"
+ if verbose:
+ params += " verbose"
+ if skip_sw:
+ params += " skip_sw"
+ if skip_hw:
+ params += " skip_hw"
+
+ return self.cls_filter_op(op=op, prio=prio, handle=handle, cls=cls,
+ chain=chain, params=params,
+ fail=fail, include_stderr=include_stderr)
+
+ def set_ethtool_tc_offloads(self, enable, fail=True):
+ args = "hw-tc-offload %s" % ("on" if enable else "off")
+ return ethtool(self, "-K", args, fail=fail)
+
+################################################################################
+def clean_up():
+ global files, netns, devs
+
+ for dev in devs:
+ dev.remove()
+ for f in files:
+ cmd("rm -f %s" % (f))
+ for ns in netns:
+ cmd("ip netns delete %s" % (ns))
+ files = []
+ netns = []
+
+def pin_prog(file_name, idx=0):
+ progs = bpftool_prog_list(expected=(idx + 1))
+ prog = progs[idx]
+ bpftool("prog pin id %d %s" % (prog["id"], file_name))
+ files.append(file_name)
+
+ return file_name, bpf_pinned(file_name)
+
+def pin_map(file_name, idx=0, expected=1):
+ maps = bpftool_map_list(expected=expected)
+ m = maps[idx]
+ bpftool("map pin id %d %s" % (m["id"], file_name))
+ files.append(file_name)
+
+ return file_name, bpf_pinned(file_name)
+
+def check_dev_info_removed(prog_file=None, map_file=None):
+ bpftool_prog_list(expected=0)
+ ret, err = bpftool("prog show pin %s" % (prog_file), fail=False)
+ fail(ret == 0, "Showing prog with removed device did not fail")
+ fail(err["error"].find("No such device") == -1,
+ "Showing prog with removed device expected ENODEV, error is %s" %
+ (err["error"]))
+
+ bpftool_map_list(expected=0)
+ ret, err = bpftool("map show pin %s" % (map_file), fail=False)
+ fail(ret == 0, "Showing map with removed device did not fail")
+ fail(err["error"].find("No such device") == -1,
+ "Showing map with removed device expected ENODEV, error is %s" %
+ (err["error"]))
+
+def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False):
+ progs = bpftool_prog_list(expected=1, ns=ns)
+ prog = progs[0]
+
+ fail("dev" not in prog.keys(), "Device parameters not reported")
+ dev = prog["dev"]
+ fail("ifindex" not in dev.keys(), "Device parameters not reported")
+ fail("ns_dev" not in dev.keys(), "Device parameters not reported")
+ fail("ns_inode" not in dev.keys(), "Device parameters not reported")
+
+ if not other_ns:
+ fail("ifname" not in dev.keys(), "Ifname not reported")
+ fail(dev["ifname"] != sim["ifname"],
+ "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"]))
+ else:
+ fail("ifname" in dev.keys(), "Ifname is reported for other ns")
+
+ maps = bpftool_map_list(expected=2, ns=ns)
+ for m in maps:
+ fail("dev" not in m.keys(), "Device parameters not reported")
+ fail(dev != m["dev"], "Map's device different than program's")
+
+def check_extack(output, reference, args):
+ if skip_extack:
+ return
+ lines = output.split("\n")
+ comp = len(lines) >= 2 and lines[1] == 'Error: ' + reference
+ fail(not comp, "Missing or incorrect netlink extack message")
+
+def check_extack_nsim(output, reference, args):
+ check_extack(output, "netdevsim: " + reference, args)
+
+def check_no_extack(res, needle):
+ fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"),
+ "Found '%s' in command output, leaky extack?" % (needle))
+
+def check_verifier_log(output, reference):
+ lines = output.split("\n")
+ for l in reversed(lines):
+ if l == reference:
+ return
+ fail(True, "Missing or incorrect message from netdevsim in verifier log")
+
+def test_spurios_extack(sim, obj, skip_hw, needle):
+ res = sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=skip_hw,
+ include_stderr=True)
+ check_no_extack(res, needle)
+ res = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1,
+ skip_hw=skip_hw, include_stderr=True)
+ check_no_extack(res, needle)
+ res = sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf",
+ include_stderr=True)
+ check_no_extack(res, needle)
+
+
+# Parse command line
+parser = argparse.ArgumentParser()
+parser.add_argument("--log", help="output verbose log to given file")
+args = parser.parse_args()
+if args.log:
+ logfile = open(args.log, 'w+')
+ logfile.write("# -*-Org-*-")
+
+log("Prepare...", "", level=1)
+log_level_inc()
+
+# Check permissions
+skip(os.getuid() != 0, "test must be run as root")
+
+# Check tools
+ret, progs = bpftool("prog", fail=False)
+skip(ret != 0, "bpftool not installed")
+base_progs = progs
+_, base_maps = bpftool("map")
+
+# Check netdevsim
+ret, out = cmd("modprobe netdevsim", fail=False)
+skip(ret != 0, "netdevsim module could not be loaded")
+
+# Check debugfs
+_, out = cmd("mount")
+if out.find("/sys/kernel/debug type debugfs") == -1:
+ cmd("mount -t debugfs none /sys/kernel/debug")
+
+# Check samples are compiled
+samples = ["sample_ret0.o", "sample_map_ret0.o"]
+for s in samples:
+ ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False)
+ skip(ret != 0, "sample %s/%s not found, please compile it" %
+ (bpf_test_dir, s))
+
+# Check if iproute2 is built with libmnl (needed by extack support)
+_, _, err = cmd("tc qdisc delete dev lo handle 0",
+ fail=False, include_stderr=True)
+if err.find("Error: Failed to find qdisc with specified handle.") == -1:
+ print("Warning: no extack message in iproute2 output, libmnl missing?")
+ log("Warning: no extack message in iproute2 output, libmnl missing?", "")
+ skip_extack = True
+
+# Check if net namespaces seem to work
+ns = mknetns()
+skip(ns is None, "Could not create a net namespace")
+cmd("ip netns delete %s" % (ns))
+netns = []
+
+try:
+ obj = bpf_obj("sample_ret0.o")
+ bytecode = bpf_bytecode("1,6 0 0 4294967295,")
+
+ start_test("Test destruction of generic XDP...")
+ sim = NetdevSim()
+ sim.set_xdp(obj, "generic")
+ sim.remove()
+ bpftool_prog_list_wait(expected=0)
+
+ sim = NetdevSim()
+ sim.tc_add_ingress()
+
+ start_test("Test TC non-offloaded...")
+ ret, _ = sim.cls_bpf_add_filter(obj, skip_hw=True, fail=False)
+ fail(ret != 0, "Software TC filter did not load")
+
+ start_test("Test TC non-offloaded isn't getting bound...")
+ ret, _ = sim.cls_bpf_add_filter(obj, fail=False)
+ fail(ret != 0, "Software TC filter did not load")
+ sim.dfs_get_bound_progs(expected=0)
+
+ sim.tc_flush_filters()
+
+ start_test("Test TC offloads are off by default...")
+ ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "TC filter loaded without enabling TC offloads")
+ check_extack(err, "TC offload is disabled on net device.", args)
+ sim.wait_for_flush()
+
+ sim.set_ethtool_tc_offloads(True)
+ sim.dfs["bpf_tc_non_bound_accept"] = "Y"
+
+ start_test("Test TC offload by default...")
+ ret, _ = sim.cls_bpf_add_filter(obj, fail=False)
+ fail(ret != 0, "Software TC filter did not load")
+ sim.dfs_get_bound_progs(expected=0)
+ ingress = sim.tc_show_ingress(expected=1)
+ fltr = ingress[0]
+ fail(not fltr["in_hw"], "Filter not offloaded by default")
+
+ sim.tc_flush_filters()
+
+ start_test("Test TC cBPF bytcode tries offload by default...")
+ ret, _ = sim.cls_bpf_add_filter(bytecode, fail=False)
+ fail(ret != 0, "Software TC filter did not load")
+ sim.dfs_get_bound_progs(expected=0)
+ ingress = sim.tc_show_ingress(expected=1)
+ fltr = ingress[0]
+ fail(not fltr["in_hw"], "Bytecode not offloaded by default")
+
+ sim.tc_flush_filters()
+ sim.dfs["bpf_tc_non_bound_accept"] = "N"
+
+ start_test("Test TC cBPF unbound bytecode doesn't offload...")
+ ret, _, err = sim.cls_bpf_add_filter(bytecode, skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "TC bytecode loaded for offload")
+ check_extack_nsim(err, "netdevsim configured to reject unbound programs.",
+ args)
+ sim.wait_for_flush()
+
+ start_test("Test non-0 chain offload...")
+ ret, _, err = sim.cls_bpf_add_filter(obj, chain=1, prio=1, handle=1,
+ skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Offloaded a filter to chain other than 0")
+ check_extack(err, "Driver supports only offload of chain 0.", args)
+ sim.tc_flush_filters()
+
+ start_test("Test TC replace...")
+ sim.cls_bpf_add_filter(obj, prio=1, handle=1)
+ sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1)
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_sw=True)
+ sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_sw=True)
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=True)
+ sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_hw=True)
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ start_test("Test TC replace bad flags...")
+ for i in range(3):
+ for j in range(3):
+ ret, _ = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1,
+ skip_sw=(j == 1), skip_hw=(j == 2),
+ fail=False)
+ fail(bool(ret) != bool(j),
+ "Software TC incorrect load in replace test, iteration %d" %
+ (j))
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ start_test("Test spurious extack from the driver...")
+ test_spurios_extack(sim, obj, False, "netdevsim")
+ test_spurios_extack(sim, obj, True, "netdevsim")
+
+ sim.set_ethtool_tc_offloads(False)
+
+ test_spurios_extack(sim, obj, False, "TC offload is disabled")
+ test_spurios_extack(sim, obj, True, "TC offload is disabled")
+
+ sim.set_ethtool_tc_offloads(True)
+
+ sim.tc_flush_filters()
+
+ start_test("Test TC offloads work...")
+ ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret != 0, "TC filter did not load with TC offloads enabled")
+ check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
+
+ start_test("Test TC offload basics...")
+ dfs = sim.dfs_get_bound_progs(expected=1)
+ progs = bpftool_prog_list(expected=1)
+ ingress = sim.tc_show_ingress(expected=1)
+
+ dprog = dfs[0]
+ prog = progs[0]
+ fltr = ingress[0]
+ fail(fltr["skip_hw"], "TC does reports 'skip_hw' on offloaded filter")
+ fail(not fltr["in_hw"], "TC does not report 'in_hw' for offloaded filter")
+ fail(not fltr["skip_sw"], "TC does not report 'skip_sw' back")
+
+ start_test("Test TC offload is device-bound...")
+ fail(str(prog["id"]) != fltr["id"], "Program IDs don't match")
+ fail(prog["tag"] != fltr["tag"], "Program tags don't match")
+ fail(fltr["id"] != dprog["id"], "Program IDs don't match")
+ fail(dprog["state"] != "xlated", "Offloaded program state not translated")
+ fail(dprog["loaded"] != "Y", "Offloaded program is not loaded")
+
+ start_test("Test disabling TC offloads is rejected while filters installed...")
+ ret, _ = sim.set_ethtool_tc_offloads(False, fail=False)
+ fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...")
+ sim.set_ethtool_tc_offloads(True)
+
+ start_test("Test qdisc removal frees things...")
+ sim.tc_flush_filters()
+ sim.tc_show_ingress(expected=0)
+
+ start_test("Test disabling TC offloads is OK without filters...")
+ ret, _ = sim.set_ethtool_tc_offloads(False, fail=False)
+ fail(ret != 0,
+ "Driver refused to disable TC offloads without filters installed...")
+
+ sim.set_ethtool_tc_offloads(True)
+
+ start_test("Test destroying device gets rid of TC filters...")
+ sim.cls_bpf_add_filter(obj, skip_sw=True)
+ sim.remove()
+ bpftool_prog_list_wait(expected=0)
+
+ sim = NetdevSim()
+ sim.set_ethtool_tc_offloads(True)
+
+ start_test("Test destroying device gets rid of XDP...")
+ sim.set_xdp(obj, "offload")
+ sim.remove()
+ bpftool_prog_list_wait(expected=0)
+
+ sim = NetdevSim()
+ sim.set_ethtool_tc_offloads(True)
+
+ start_test("Test XDP prog reporting...")
+ sim.set_xdp(obj, "drv")
+ ipl = sim.ip_link_show(xdp=True)
+ progs = bpftool_prog_list(expected=1)
+ fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"],
+ "Loaded program has wrong ID")
+
+ start_test("Test XDP prog replace without force...")
+ ret, _ = sim.set_xdp(obj, "drv", fail=False)
+ fail(ret == 0, "Replaced XDP program without -force")
+ sim.wait_for_flush(total=1)
+
+ start_test("Test XDP prog replace with force...")
+ ret, _ = sim.set_xdp(obj, "drv", force=True, fail=False)
+ fail(ret != 0, "Could not replace XDP program with -force")
+ bpftool_prog_list_wait(expected=1)
+ ipl = sim.ip_link_show(xdp=True)
+ progs = bpftool_prog_list(expected=1)
+ fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"],
+ "Loaded program has wrong ID")
+ fail("dev" in progs[0].keys(),
+ "Device parameters reported for non-offloaded program")
+
+ start_test("Test XDP prog replace with bad flags...")
+ ret, _, err = sim.set_xdp(obj, "generic", force=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Replaced XDP program with a program in different mode")
+ fail(err.count("File exists") != 1, "Replaced driver XDP with generic")
+ ret, _, err = sim.set_xdp(obj, "", force=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Replaced XDP program with a program in different mode")
+ check_extack(err, "program loaded with different flags.", args)
+
+ start_test("Test XDP prog remove with bad flags...")
+ ret, _, err = sim.unset_xdp("", force=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Removed program with a bad mode")
+ check_extack(err, "program loaded with different flags.", args)
+
+ start_test("Test MTU restrictions...")
+ ret, _ = sim.set_mtu(9000, fail=False)
+ fail(ret == 0,
+ "Driver should refuse to increase MTU to 9000 with XDP loaded...")
+ sim.unset_xdp("drv")
+ bpftool_prog_list_wait(expected=0)
+ sim.set_mtu(9000)
+ ret, _, err = sim.set_xdp(obj, "drv", fail=False, include_stderr=True)
+ fail(ret == 0, "Driver should refuse to load program with MTU of 9000...")
+ check_extack_nsim(err, "MTU too large w/ XDP enabled.", args)
+ sim.set_mtu(1500)
+
+ sim.wait_for_flush()
+ start_test("Test non-offload XDP attaching to HW...")
+ bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/nooffload")
+ nooffload = bpf_pinned("/sys/fs/bpf/nooffload")
+ ret, _, err = sim.set_xdp(nooffload, "offload",
+ fail=False, include_stderr=True)
+ fail(ret == 0, "attached non-offloaded XDP program to HW")
+ check_extack_nsim(err, "xdpoffload of non-bound program.", args)
+ rm("/sys/fs/bpf/nooffload")
+
+ start_test("Test offload XDP attaching to drv...")
+ bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/offload",
+ dev=sim['ifname'])
+ offload = bpf_pinned("/sys/fs/bpf/offload")
+ ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True)
+ fail(ret == 0, "attached offloaded XDP program to drv")
+ check_extack(err, "using device-bound program without HW_MODE flag is not supported.", args)
+ rm("/sys/fs/bpf/offload")
+ sim.wait_for_flush()
+
+ start_test("Test XDP offload...")
+ _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True)
+ ipl = sim.ip_link_show(xdp=True)
+ link_xdp = ipl["xdp"]["prog"]
+ progs = bpftool_prog_list(expected=1)
+ prog = progs[0]
+ fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID")
+ check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
+
+ start_test("Test XDP offload is device bound...")
+ dfs = sim.dfs_get_bound_progs(expected=1)
+ dprog = dfs[0]
+
+ fail(prog["id"] != link_xdp["id"], "Program IDs don't match")
+ fail(prog["tag"] != link_xdp["tag"], "Program tags don't match")
+ fail(str(link_xdp["id"]) != dprog["id"], "Program IDs don't match")
+ fail(dprog["state"] != "xlated", "Offloaded program state not translated")
+ fail(dprog["loaded"] != "Y", "Offloaded program is not loaded")
+
+ start_test("Test removing XDP program many times...")
+ sim.unset_xdp("offload")
+ sim.unset_xdp("offload")
+ sim.unset_xdp("drv")
+ sim.unset_xdp("drv")
+ sim.unset_xdp("")
+ sim.unset_xdp("")
+ bpftool_prog_list_wait(expected=0)
+
+ start_test("Test attempt to use a program for a wrong device...")
+ sim2 = NetdevSim()
+ sim2.set_xdp(obj, "offload")
+ pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
+
+ ret, _, err = sim.set_xdp(pinned, "offload",
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Pinned program loaded for a different device accepted")
+ check_extack_nsim(err, "program bound to different dev.", args)
+ sim2.remove()
+ ret, _, err = sim.set_xdp(pinned, "offload",
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Pinned program loaded for a removed device accepted")
+ check_extack_nsim(err, "xdpoffload of non-bound program.", args)
+ rm(pin_file)
+ bpftool_prog_list_wait(expected=0)
+
+ start_test("Test multi-attachment XDP - attach...")
+ sim.set_xdp(obj, "offload")
+ xdp = sim.ip_link_show(xdp=True)["xdp"]
+ offloaded = sim.dfs_read("bpf_offloaded_id")
+ fail("prog" not in xdp, "Base program not reported in single program mode")
+ fail(len(ipl["xdp"]["attached"]) != 1,
+ "Wrong attached program count with one program")
+
+ sim.set_xdp(obj, "")
+ two_xdps = sim.ip_link_show(xdp=True)["xdp"]
+ offloaded2 = sim.dfs_read("bpf_offloaded_id")
+
+ fail(two_xdps["mode"] != 4, "Bad mode reported with multiple programs")
+ fail("prog" in two_xdps, "Base program reported in multi program mode")
+ fail(xdp["attached"][0] not in two_xdps["attached"],
+ "Offload program not reported after driver activated")
+ fail(len(two_xdps["attached"]) != 2,
+ "Wrong attached program count with two programs")
+ fail(two_xdps["attached"][0]["prog"]["id"] ==
+ two_xdps["attached"][1]["prog"]["id"],
+ "offloaded and drv programs have the same id")
+ fail(offloaded != offloaded2,
+ "offload ID changed after loading driver program")
+
+ start_test("Test multi-attachment XDP - replace...")
+ ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True)
+ fail(err.count("busy") != 1, "Replaced one of programs without -force")
+
+ start_test("Test multi-attachment XDP - detach...")
+ ret, _, err = sim.unset_xdp("drv", force=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Removed program with a bad mode")
+ check_extack(err, "program loaded with different flags.", args)
+
+ sim.unset_xdp("offload")
+ xdp = sim.ip_link_show(xdp=True)["xdp"]
+ offloaded = sim.dfs_read("bpf_offloaded_id")
+
+ fail(xdp["mode"] != 1, "Bad mode reported after multiple programs")
+ fail("prog" not in xdp,
+ "Base program not reported after multi program mode")
+ fail(xdp["attached"][0] not in two_xdps["attached"],
+ "Offload program not reported after driver activated")
+ fail(len(ipl["xdp"]["attached"]) != 1,
+ "Wrong attached program count with remaining programs")
+ fail(offloaded != "0", "offload ID reported with only driver program left")
+
+ start_test("Test multi-attachment XDP - device remove...")
+ sim.set_xdp(obj, "offload")
+ sim.remove()
+
+ sim = NetdevSim()
+ sim.set_ethtool_tc_offloads(True)
+
+ start_test("Test mixing of TC and XDP...")
+ sim.tc_add_ingress()
+ sim.set_xdp(obj, "offload")
+ ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Loading TC when XDP active should fail")
+ check_extack_nsim(err, "driver and netdev offload states mismatch.", args)
+ sim.unset_xdp("offload")
+ sim.wait_for_flush()
+
+ sim.cls_bpf_add_filter(obj, skip_sw=True)
+ ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True)
+ fail(ret == 0, "Loading XDP when TC active should fail")
+ check_extack_nsim(err, "TC program is already loaded.", args)
+
+ start_test("Test binding TC from pinned...")
+ pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
+ sim.tc_flush_filters(bound=1, total=1)
+ sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True)
+ sim.tc_flush_filters(bound=1, total=1)
+
+ start_test("Test binding XDP from pinned...")
+ sim.set_xdp(obj, "offload")
+ pin_file, pinned = pin_prog("/sys/fs/bpf/tmp2", idx=1)
+
+ sim.set_xdp(pinned, "offload", force=True)
+ sim.unset_xdp("offload")
+ sim.set_xdp(pinned, "offload", force=True)
+ sim.unset_xdp("offload")
+
+ start_test("Test offload of wrong type fails...")
+ ret, _ = sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True, fail=False)
+ fail(ret == 0, "Managed to attach XDP program to TC")
+
+ start_test("Test asking for TC offload of two filters...")
+ sim.cls_bpf_add_filter(obj, da=True, skip_sw=True)
+ ret, _, err = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Managed to offload two TC filters at the same time")
+ check_extack_nsim(err, "driver and netdev offload states mismatch.", args)
+
+ sim.tc_flush_filters(bound=2, total=2)
+
+ start_test("Test if netdev removal waits for translation...")
+ delay_msec = 500
+ sim.dfs["bpf_bind_verifier_delay"] = delay_msec
+ start = time.time()
+ cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \
+ (sim['ifname'], obj)
+ tc_proc = cmd(cmd_line, background=True, fail=False)
+ # Wait for the verifier to start
+ while sim.dfs_num_bound_progs() <= 2:
+ pass
+ sim.remove()
+ end = time.time()
+ ret, _ = cmd_result(tc_proc, fail=False)
+ time_diff = end - start
+ log("Time", "start:\t%s\nend:\t%s\ndiff:\t%s" % (start, end, time_diff))
+
+ fail(ret == 0, "Managed to load TC filter on a unregistering device")
+ delay_sec = delay_msec * 0.001
+ fail(time_diff < delay_sec, "Removal process took %s, expected %s" %
+ (time_diff, delay_sec))
+
+ # Remove all pinned files and reinstantiate the netdev
+ clean_up()
+ bpftool_prog_list_wait(expected=0)
+
+ sim = NetdevSim()
+ map_obj = bpf_obj("sample_map_ret0.o")
+ start_test("Test loading program with maps...")
+ sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+
+ start_test("Test bpftool bound info reporting (own ns)...")
+ check_dev_info(False, "")
+
+ start_test("Test bpftool bound info reporting (other ns)...")
+ ns = mknetns()
+ sim.set_ns(ns)
+ check_dev_info(True, "")
+
+ start_test("Test bpftool bound info reporting (remote ns)...")
+ check_dev_info(False, ns)
+
+ start_test("Test bpftool bound info reporting (back to own ns)...")
+ sim.set_ns("")
+ check_dev_info(False, "")
+
+ prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog")
+ map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2)
+ sim.remove()
+
+ start_test("Test bpftool bound info reporting (removed dev)...")
+ check_dev_info_removed(prog_file=prog_file, map_file=map_file)
+
+ # Remove all pinned files and reinstantiate the netdev
+ clean_up()
+ bpftool_prog_list_wait(expected=0)
+
+ sim = NetdevSim()
+
+ start_test("Test map update (no flags)...")
+ sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+ maps = bpftool_map_list(expected=2)
+ array = maps[0] if maps[0]["type"] == "array" else maps[1]
+ htab = maps[0] if maps[0]["type"] == "hash" else maps[1]
+ for m in maps:
+ for i in range(2):
+ bpftool("map update id %d key %s value %s" %
+ (m["id"], int2str("I", i), int2str("Q", i * 3)))
+
+ for m in maps:
+ ret, _ = bpftool("map update id %d key %s value %s" %
+ (m["id"], int2str("I", 3), int2str("Q", 3 * 3)),
+ fail=False)
+ fail(ret == 0, "added too many entries")
+
+ start_test("Test map update (exists)...")
+ for m in maps:
+ for i in range(2):
+ bpftool("map update id %d key %s value %s exist" %
+ (m["id"], int2str("I", i), int2str("Q", i * 3)))
+
+ for m in maps:
+ ret, err = bpftool("map update id %d key %s value %s exist" %
+ (m["id"], int2str("I", 3), int2str("Q", 3 * 3)),
+ fail=False)
+ fail(ret == 0, "updated non-existing key")
+ fail(err["error"].find("No such file or directory") == -1,
+ "expected ENOENT, error is '%s'" % (err["error"]))
+
+ start_test("Test map update (noexist)...")
+ for m in maps:
+ for i in range(2):
+ ret, err = bpftool("map update id %d key %s value %s noexist" %
+ (m["id"], int2str("I", i), int2str("Q", i * 3)),
+ fail=False)
+ fail(ret == 0, "updated existing key")
+ fail(err["error"].find("File exists") == -1,
+ "expected EEXIST, error is '%s'" % (err["error"]))
+
+ start_test("Test map dump...")
+ for m in maps:
+ _, entries = bpftool("map dump id %d" % (m["id"]))
+ for i in range(2):
+ key = str2int(entries[i]["key"])
+ fail(key != i, "expected key %d, got %d" % (key, i))
+ val = str2int(entries[i]["value"])
+ fail(val != i * 3, "expected value %d, got %d" % (val, i * 3))
+
+ start_test("Test map getnext...")
+ for m in maps:
+ _, entry = bpftool("map getnext id %d" % (m["id"]))
+ key = str2int(entry["next_key"])
+ fail(key != 0, "next key %d, expected %d" % (key, 0))
+ _, entry = bpftool("map getnext id %d key %s" %
+ (m["id"], int2str("I", 0)))
+ key = str2int(entry["next_key"])
+ fail(key != 1, "next key %d, expected %d" % (key, 1))
+ ret, err = bpftool("map getnext id %d key %s" %
+ (m["id"], int2str("I", 1)), fail=False)
+ fail(ret == 0, "got next key past the end of map")
+ fail(err["error"].find("No such file or directory") == -1,
+ "expected ENOENT, error is '%s'" % (err["error"]))
+
+ start_test("Test map delete (htab)...")
+ for i in range(2):
+ bpftool("map delete id %d key %s" % (htab["id"], int2str("I", i)))
+
+ start_test("Test map delete (array)...")
+ for i in range(2):
+ ret, err = bpftool("map delete id %d key %s" %
+ (htab["id"], int2str("I", i)), fail=False)
+ fail(ret == 0, "removed entry from an array")
+ fail(err["error"].find("No such file or directory") == -1,
+ "expected ENOENT, error is '%s'" % (err["error"]))
+
+ start_test("Test map remove...")
+ sim.unset_xdp("offload")
+ bpftool_map_list_wait(expected=0)
+ sim.remove()
+
+ sim = NetdevSim()
+ sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+ sim.remove()
+ bpftool_map_list_wait(expected=0)
+
+ start_test("Test map creation fail path...")
+ sim = NetdevSim()
+ sim.dfs["bpf_map_accept"] = "N"
+ ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False)
+ fail(ret == 0,
+ "netdevsim didn't refuse to create a map with offload disabled")
+
+ sim.remove()
+
+ start_test("Test multi-dev ASIC program reuse...")
+ simA = NetdevSim()
+ simB1 = NetdevSim()
+ simB2 = NetdevSim(link=simB1)
+ simB3 = NetdevSim(link=simB1)
+ sims = (simA, simB1, simB2, simB3)
+ simB = (simB1, simB2, simB3)
+
+ bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimA",
+ dev=simA['ifname'])
+ progA = bpf_pinned("/sys/fs/bpf/nsimA")
+ bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB",
+ dev=simB1['ifname'])
+ progB = bpf_pinned("/sys/fs/bpf/nsimB")
+
+ simA.set_xdp(progA, "offload", JSON=False)
+ for d in simB:
+ d.set_xdp(progB, "offload", JSON=False)
+
+ start_test("Test multi-dev ASIC cross-dev replace...")
+ ret, _ = simA.set_xdp(progB, "offload", force=True, JSON=False, fail=False)
+ fail(ret == 0, "cross-ASIC program allowed")
+ for d in simB:
+ ret, _ = d.set_xdp(progA, "offload", force=True, JSON=False, fail=False)
+ fail(ret == 0, "cross-ASIC program allowed")
+
+ start_test("Test multi-dev ASIC cross-dev install...")
+ for d in sims:
+ d.unset_xdp("offload")
+
+ ret, _, err = simA.set_xdp(progB, "offload", force=True, JSON=False,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "cross-ASIC program allowed")
+ check_extack_nsim(err, "program bound to different dev.", args)
+ for d in simB:
+ ret, _, err = d.set_xdp(progA, "offload", force=True, JSON=False,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "cross-ASIC program allowed")
+ check_extack_nsim(err, "program bound to different dev.", args)
+
+ start_test("Test multi-dev ASIC cross-dev map reuse...")
+
+ mapA = bpftool("prog show %s" % (progA))[1]["map_ids"][0]
+ mapB = bpftool("prog show %s" % (progB))[1]["map_ids"][0]
+
+ ret, _ = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB_",
+ dev=simB3['ifname'],
+ maps=["idx 0 id %d" % (mapB)],
+ fail=False)
+ fail(ret != 0, "couldn't reuse a map on the same ASIC")
+ rm("/sys/fs/bpf/nsimB_")
+
+ ret, _, err = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimA_",
+ dev=simA['ifname'],
+ maps=["idx 0 id %d" % (mapB)],
+ fail=False, include_stderr=True)
+ fail(ret == 0, "could reuse a map on a different ASIC")
+ fail(err.count("offload device mismatch between prog and map") == 0,
+ "error message missing for cross-ASIC map")
+
+ ret, _, err = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB_",
+ dev=simB1['ifname'],
+ maps=["idx 0 id %d" % (mapA)],
+ fail=False, include_stderr=True)
+ fail(ret == 0, "could reuse a map on a different ASIC")
+ fail(err.count("offload device mismatch between prog and map") == 0,
+ "error message missing for cross-ASIC map")
+
+ start_test("Test multi-dev ASIC cross-dev destruction...")
+ bpftool_prog_list_wait(expected=2)
+
+ simA.remove()
+ bpftool_prog_list_wait(expected=1)
+
+ ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+ fail(ifnameB != simB1['ifname'], "program not bound to originial device")
+ simB1.remove()
+ bpftool_prog_list_wait(expected=1)
+
+ start_test("Test multi-dev ASIC cross-dev destruction - move...")
+ ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+ fail(ifnameB not in (simB2['ifname'], simB3['ifname']),
+ "program not bound to remaining devices")
+
+ simB2.remove()
+ ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+ fail(ifnameB != simB3['ifname'], "program not bound to remaining device")
+
+ simB3.remove()
+ bpftool_prog_list_wait(expected=0)
+
+ start_test("Test multi-dev ASIC cross-dev destruction - orphaned...")
+ ret, out = bpftool("prog show %s" % (progB), fail=False)
+ fail(ret == 0, "got information about orphaned program")
+ fail("error" not in out, "no error reported for get info on orphaned")
+ fail(out["error"] != "can't get prog info: No such device",
+ "wrong error for get info on orphaned")
+
+ print("%s: OK" % (os.path.basename(__file__)))
+
+finally:
+ log("Clean up...", "", level=1)
+ log_level_inc()
+ clean_up()
diff --git a/tools/testing/selftests/bpf/test_pkt_access.c b/tools/testing/selftests/bpf/test_pkt_access.c
new file mode 100644
index 000000000..6e11ba117
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_pkt_access.c
@@ -0,0 +1,65 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define barrier() __asm__ __volatile__("": : :"memory")
+int _version SEC("version") = 1;
+
+SEC("test1")
+int process(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct ethhdr *eth = (struct ethhdr *)(data);
+ struct tcphdr *tcp = NULL;
+ __u8 proto = 255;
+ __u64 ihl_len;
+
+ if (eth + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = (struct iphdr *)(eth + 1);
+
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+ ihl_len = iph->ihl * 4;
+ proto = iph->protocol;
+ tcp = (struct tcphdr *)((void *)(iph) + ihl_len);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(eth + 1);
+
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+ ihl_len = sizeof(*ip6h);
+ proto = ip6h->nexthdr;
+ tcp = (struct tcphdr *)((void *)(ip6h) + ihl_len);
+ }
+
+ if (tcp) {
+ if (((void *)(tcp) + 20) > data_end || proto != 6)
+ return TC_ACT_SHOT;
+ barrier(); /* to force ordering of checks */
+ if (((void *)(tcp) + 18) > data_end)
+ return TC_ACT_SHOT;
+ if (tcp->urg_ptr == 123)
+ return TC_ACT_OK;
+ }
+
+ return TC_ACT_UNSPEC;
+}
diff --git a/tools/testing/selftests/bpf/test_pkt_md_access.c b/tools/testing/selftests/bpf/test_pkt_md_access.c
new file mode 100644
index 000000000..7956302ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_pkt_md_access.c
@@ -0,0 +1,46 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define TEST_FIELD(TYPE, FIELD, MASK) \
+ { \
+ TYPE tmp = *(volatile TYPE *)&skb->FIELD; \
+ if (tmp != ((*(volatile __u32 *)&skb->FIELD) & MASK)) \
+ return TC_ACT_SHOT; \
+ }
+#else
+#define TEST_FIELD_OFFSET(a, b) ((sizeof(a) - sizeof(b)) / sizeof(b))
+#define TEST_FIELD(TYPE, FIELD, MASK) \
+ { \
+ TYPE tmp = *((volatile TYPE *)&skb->FIELD + \
+ TEST_FIELD_OFFSET(skb->FIELD, TYPE)); \
+ if (tmp != ((*(volatile __u32 *)&skb->FIELD) & MASK)) \
+ return TC_ACT_SHOT; \
+ }
+#endif
+
+SEC("test1")
+int process(struct __sk_buff *skb)
+{
+ TEST_FIELD(__u8, len, 0xFF);
+ TEST_FIELD(__u16, len, 0xFFFF);
+ TEST_FIELD(__u32, len, 0xFFFFFFFF);
+ TEST_FIELD(__u16, protocol, 0xFFFF);
+ TEST_FIELD(__u32, protocol, 0xFFFFFFFF);
+ TEST_FIELD(__u8, hash, 0xFF);
+ TEST_FIELD(__u16, hash, 0xFFFF);
+ TEST_FIELD(__u32, hash, 0xFFFFFFFF);
+
+ return TC_ACT_OK;
+}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
new file mode 100644
index 000000000..0fcd38ffc
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -0,0 +1,1756 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include <linux/types.h>
+typedef __u16 __sum16;
+#include <arpa/inet.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/filter.h>
+#include <linux/perf_event.h>
+#include <linux/unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+#include <linux/bpf.h>
+#include <linux/err.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "test_iptunnel_common.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+#include "bpf_rlimit.h"
+#include "trace_helpers.h"
+
+static int error_cnt, pass_cnt;
+static bool jit_enabled;
+
+#define MAGIC_BYTES 123
+
+/* ipv4 test vector */
+static struct {
+ struct ethhdr eth;
+ struct iphdr iph;
+ struct tcphdr tcp;
+} __packed pkt_v4 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+ .iph.ihl = 5,
+ .iph.protocol = 6,
+ .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+ .tcp.urg_ptr = 123,
+};
+
+/* ipv6 test vector */
+static struct {
+ struct ethhdr eth;
+ struct ipv6hdr iph;
+ struct tcphdr tcp;
+} __packed pkt_v6 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .iph.nexthdr = 6,
+ .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .tcp.urg_ptr = 123,
+};
+
+#define CHECK(condition, tag, format...) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ error_cnt++; \
+ printf("%s:FAIL:%s ", __func__, tag); \
+ printf(format); \
+ } else { \
+ pass_cnt++; \
+ printf("%s:PASS:%s %d nsec\n", __func__, tag, duration);\
+ } \
+ __ret; \
+})
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+ const char *name)
+{
+ struct bpf_map *map;
+
+ map = bpf_object__find_map_by_name(obj, name);
+ if (!map) {
+ printf("%s:FAIL:map '%s' not found\n", test, name);
+ error_cnt++;
+ return -1;
+ }
+ return bpf_map__fd(map);
+}
+
+static void test_pkt_access(void)
+{
+ const char *file = "./test_pkt_access.o";
+ struct bpf_object *obj;
+ __u32 duration, retval;
+ int err, prog_fd;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || errno || retval, "ipv4",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+
+ err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || errno || retval, "ipv6",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+ bpf_object__close(obj);
+}
+
+static void test_xdp(void)
+{
+ struct vip key4 = {.protocol = 6, .family = AF_INET};
+ struct vip key6 = {.protocol = 6, .family = AF_INET6};
+ struct iptnl_info value4 = {.family = AF_INET};
+ struct iptnl_info value6 = {.family = AF_INET6};
+ const char *file = "./test_xdp.o";
+ struct bpf_object *obj;
+ char buf[128];
+ struct ipv6hdr *iph6 = (void *)buf + sizeof(struct ethhdr);
+ struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+ __u32 duration, retval, size;
+ int err, prog_fd, map_fd;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ map_fd = bpf_find_map(__func__, obj, "vip2tnl");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_update_elem(map_fd, &key4, &value4, 0);
+ bpf_map_update_elem(map_fd, &key6, &value6, 0);
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+
+ CHECK(err || errno || retval != XDP_TX || size != 74 ||
+ iph->protocol != IPPROTO_IPIP, "ipv4",
+ "err %d errno %d retval %d size %d\n",
+ err, errno, retval, size);
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ buf, &size, &retval, &duration);
+ CHECK(err || errno || retval != XDP_TX || size != 114 ||
+ iph6->nexthdr != IPPROTO_IPV6, "ipv6",
+ "err %d errno %d retval %d size %d\n",
+ err, errno, retval, size);
+out:
+ bpf_object__close(obj);
+}
+
+static void test_xdp_adjust_tail(void)
+{
+ const char *file = "./test_adjust_tail.o";
+ struct bpf_object *obj;
+ char buf[128];
+ __u32 duration, retval, size;
+ int err, prog_fd;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+
+ CHECK(err || errno || retval != XDP_DROP,
+ "ipv4", "err %d errno %d retval %d size %d\n",
+ err, errno, retval, size);
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ buf, &size, &retval, &duration);
+ CHECK(err || errno || retval != XDP_TX || size != 54,
+ "ipv6", "err %d errno %d retval %d size %d\n",
+ err, errno, retval, size);
+ bpf_object__close(obj);
+}
+
+
+
+#define MAGIC_VAL 0x1234
+#define NUM_ITER 100000
+#define VIP_NUM 5
+
+static void test_l4lb(const char *file)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct vip key = {.protocol = 6};
+ struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+ } value = {.vip_num = VIP_NUM};
+ __u32 stats_key = VIP_NUM;
+ struct vip_stats {
+ __u64 bytes;
+ __u64 pkts;
+ } stats[nr_cpus];
+ struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+ } real_def = {.dst = MAGIC_VAL};
+ __u32 ch_key = 11, real_num = 3;
+ __u32 duration, retval, size;
+ int err, i, prog_fd, map_fd;
+ __u64 bytes = 0, pkts = 0;
+ struct bpf_object *obj;
+ char buf[128];
+ u32 *magic = (u32 *)buf;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ map_fd = bpf_find_map(__func__, obj, "vip_map");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_update_elem(map_fd, &key, &value, 0);
+
+ map_fd = bpf_find_map(__func__, obj, "ch_rings");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
+
+ map_fd = bpf_find_map(__func__, obj, "reals");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
+
+ err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+ CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
+ *magic != MAGIC_VAL, "ipv4",
+ "err %d errno %d retval %d size %d magic %x\n",
+ err, errno, retval, size, *magic);
+
+ err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
+ buf, &size, &retval, &duration);
+ CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
+ *magic != MAGIC_VAL, "ipv6",
+ "err %d errno %d retval %d size %d magic %x\n",
+ err, errno, retval, size, *magic);
+
+ map_fd = bpf_find_map(__func__, obj, "stats");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_lookup_elem(map_fd, &stats_key, stats);
+ for (i = 0; i < nr_cpus; i++) {
+ bytes += stats[i].bytes;
+ pkts += stats[i].pkts;
+ }
+ if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
+ error_cnt++;
+ printf("test_l4lb:FAIL:stats %lld %lld\n", bytes, pkts);
+ }
+out:
+ bpf_object__close(obj);
+}
+
+static void test_l4lb_all(void)
+{
+ const char *file1 = "./test_l4lb.o";
+ const char *file2 = "./test_l4lb_noinline.o";
+
+ test_l4lb(file1);
+ test_l4lb(file2);
+}
+
+static void test_xdp_noinline(void)
+{
+ const char *file = "./test_xdp_noinline.o";
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct vip key = {.protocol = 6};
+ struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+ } value = {.vip_num = VIP_NUM};
+ __u32 stats_key = VIP_NUM;
+ struct vip_stats {
+ __u64 bytes;
+ __u64 pkts;
+ } stats[nr_cpus];
+ struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+ } real_def = {.dst = MAGIC_VAL};
+ __u32 ch_key = 11, real_num = 3;
+ __u32 duration, retval, size;
+ int err, i, prog_fd, map_fd;
+ __u64 bytes = 0, pkts = 0;
+ struct bpf_object *obj;
+ char buf[128];
+ u32 *magic = (u32 *)buf;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ map_fd = bpf_find_map(__func__, obj, "vip_map");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_update_elem(map_fd, &key, &value, 0);
+
+ map_fd = bpf_find_map(__func__, obj, "ch_rings");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
+
+ map_fd = bpf_find_map(__func__, obj, "reals");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
+
+ err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+ CHECK(err || errno || retval != 1 || size != 54 ||
+ *magic != MAGIC_VAL, "ipv4",
+ "err %d errno %d retval %d size %d magic %x\n",
+ err, errno, retval, size, *magic);
+
+ err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
+ buf, &size, &retval, &duration);
+ CHECK(err || errno || retval != 1 || size != 74 ||
+ *magic != MAGIC_VAL, "ipv6",
+ "err %d errno %d retval %d size %d magic %x\n",
+ err, errno, retval, size, *magic);
+
+ map_fd = bpf_find_map(__func__, obj, "stats");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_lookup_elem(map_fd, &stats_key, stats);
+ for (i = 0; i < nr_cpus; i++) {
+ bytes += stats[i].bytes;
+ pkts += stats[i].pkts;
+ }
+ if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
+ error_cnt++;
+ printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts);
+ }
+out:
+ bpf_object__close(obj);
+}
+
+static void test_tcp_estats(void)
+{
+ const char *file = "./test_tcp_estats.o";
+ int err, prog_fd;
+ struct bpf_object *obj;
+ __u32 duration = 0;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ CHECK(err, "", "err %d errno %d\n", err, errno);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ bpf_object__close(obj);
+}
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+static bool is_jit_enabled(void)
+{
+ const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
+ bool enabled = false;
+ int sysctl_fd;
+
+ sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
+ if (sysctl_fd != -1) {
+ char tmpc;
+
+ if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
+ enabled = (tmpc != '0');
+ close(sysctl_fd);
+ }
+
+ return enabled;
+}
+
+static void test_bpf_obj_id(void)
+{
+ const __u64 array_magic_value = 0xfaceb00c;
+ const __u32 array_key = 0;
+ const int nr_iters = 2;
+ const char *file = "./test_obj_id.o";
+ const char *expected_prog_name = "test_obj_id";
+ const char *expected_map_name = "test_map_id";
+ const __u64 nsec_per_sec = 1000000000;
+
+ struct bpf_object *objs[nr_iters];
+ int prog_fds[nr_iters], map_fds[nr_iters];
+ /* +1 to test for the info_len returned by kernel */
+ struct bpf_prog_info prog_infos[nr_iters + 1];
+ struct bpf_map_info map_infos[nr_iters + 1];
+ /* Each prog only uses one map. +1 to test nr_map_ids
+ * returned by kernel.
+ */
+ __u32 map_ids[nr_iters + 1];
+ char jited_insns[128], xlated_insns[128], zeros[128];
+ __u32 i, next_id, info_len, nr_id_found, duration = 0;
+ struct timespec real_time_ts, boot_time_ts;
+ int err = 0;
+ __u64 array_value;
+ uid_t my_uid = getuid();
+ time_t now, load_time;
+
+ err = bpf_prog_get_fd_by_id(0);
+ CHECK(err >= 0 || errno != ENOENT,
+ "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno);
+
+ err = bpf_map_get_fd_by_id(0);
+ CHECK(err >= 0 || errno != ENOENT,
+ "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno);
+
+ for (i = 0; i < nr_iters; i++)
+ objs[i] = NULL;
+
+ /* Check bpf_obj_get_info_by_fd() */
+ bzero(zeros, sizeof(zeros));
+ for (i = 0; i < nr_iters; i++) {
+ now = time(NULL);
+ err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER,
+ &objs[i], &prog_fds[i]);
+ /* test_obj_id.o is a dumb prog. It should never fail
+ * to load.
+ */
+ if (err)
+ error_cnt++;
+ assert(!err);
+
+ /* Insert a magic value to the map */
+ map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
+ assert(map_fds[i] >= 0);
+ err = bpf_map_update_elem(map_fds[i], &array_key,
+ &array_magic_value, 0);
+ assert(!err);
+
+ /* Check getting map info */
+ info_len = sizeof(struct bpf_map_info) * 2;
+ bzero(&map_infos[i], info_len);
+ err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i],
+ &info_len);
+ if (CHECK(err ||
+ map_infos[i].type != BPF_MAP_TYPE_ARRAY ||
+ map_infos[i].key_size != sizeof(__u32) ||
+ map_infos[i].value_size != sizeof(__u64) ||
+ map_infos[i].max_entries != 1 ||
+ map_infos[i].map_flags != 0 ||
+ info_len != sizeof(struct bpf_map_info) ||
+ strcmp((char *)map_infos[i].name, expected_map_name),
+ "get-map-info(fd)",
+ "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
+ err, errno,
+ map_infos[i].type, BPF_MAP_TYPE_ARRAY,
+ info_len, sizeof(struct bpf_map_info),
+ map_infos[i].key_size,
+ map_infos[i].value_size,
+ map_infos[i].max_entries,
+ map_infos[i].map_flags,
+ map_infos[i].name, expected_map_name))
+ goto done;
+
+ /* Check getting prog info */
+ info_len = sizeof(struct bpf_prog_info) * 2;
+ bzero(&prog_infos[i], info_len);
+ bzero(jited_insns, sizeof(jited_insns));
+ bzero(xlated_insns, sizeof(xlated_insns));
+ prog_infos[i].jited_prog_insns = ptr_to_u64(jited_insns);
+ prog_infos[i].jited_prog_len = sizeof(jited_insns);
+ prog_infos[i].xlated_prog_insns = ptr_to_u64(xlated_insns);
+ prog_infos[i].xlated_prog_len = sizeof(xlated_insns);
+ prog_infos[i].map_ids = ptr_to_u64(map_ids + i);
+ prog_infos[i].nr_map_ids = 2;
+ err = clock_gettime(CLOCK_REALTIME, &real_time_ts);
+ assert(!err);
+ err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts);
+ assert(!err);
+ err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i],
+ &info_len);
+ load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
+ + (prog_infos[i].load_time / nsec_per_sec);
+ if (CHECK(err ||
+ prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER ||
+ info_len != sizeof(struct bpf_prog_info) ||
+ (jit_enabled && !prog_infos[i].jited_prog_len) ||
+ (jit_enabled &&
+ !memcmp(jited_insns, zeros, sizeof(zeros))) ||
+ !prog_infos[i].xlated_prog_len ||
+ !memcmp(xlated_insns, zeros, sizeof(zeros)) ||
+ load_time < now - 60 || load_time > now + 60 ||
+ prog_infos[i].created_by_uid != my_uid ||
+ prog_infos[i].nr_map_ids != 1 ||
+ *(int *)prog_infos[i].map_ids != map_infos[i].id ||
+ strcmp((char *)prog_infos[i].name, expected_prog_name),
+ "get-prog-info(fd)",
+ "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
+ err, errno, i,
+ prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
+ info_len, sizeof(struct bpf_prog_info),
+ jit_enabled,
+ prog_infos[i].jited_prog_len,
+ prog_infos[i].xlated_prog_len,
+ !!memcmp(jited_insns, zeros, sizeof(zeros)),
+ !!memcmp(xlated_insns, zeros, sizeof(zeros)),
+ load_time, now,
+ prog_infos[i].created_by_uid, my_uid,
+ prog_infos[i].nr_map_ids, 1,
+ *(int *)prog_infos[i].map_ids, map_infos[i].id,
+ prog_infos[i].name, expected_prog_name))
+ goto done;
+ }
+
+ /* Check bpf_prog_get_next_id() */
+ nr_id_found = 0;
+ next_id = 0;
+ while (!bpf_prog_get_next_id(next_id, &next_id)) {
+ struct bpf_prog_info prog_info = {};
+ __u32 saved_map_id;
+ int prog_fd;
+
+ info_len = sizeof(prog_info);
+
+ prog_fd = bpf_prog_get_fd_by_id(next_id);
+ if (prog_fd < 0 && errno == ENOENT)
+ /* The bpf_prog is in the dead row */
+ continue;
+ if (CHECK(prog_fd < 0, "get-prog-fd(next_id)",
+ "prog_fd %d next_id %d errno %d\n",
+ prog_fd, next_id, errno))
+ break;
+
+ for (i = 0; i < nr_iters; i++)
+ if (prog_infos[i].id == next_id)
+ break;
+
+ if (i == nr_iters)
+ continue;
+
+ nr_id_found++;
+
+ /* Negative test:
+ * prog_info.nr_map_ids = 1
+ * prog_info.map_ids = NULL
+ */
+ prog_info.nr_map_ids = 1;
+ err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ if (CHECK(!err || errno != EFAULT,
+ "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)",
+ err, errno, EFAULT))
+ break;
+ bzero(&prog_info, sizeof(prog_info));
+ info_len = sizeof(prog_info);
+
+ saved_map_id = *(int *)(prog_infos[i].map_ids);
+ prog_info.map_ids = prog_infos[i].map_ids;
+ prog_info.nr_map_ids = 2;
+ err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ prog_infos[i].jited_prog_insns = 0;
+ prog_infos[i].xlated_prog_insns = 0;
+ CHECK(err || info_len != sizeof(struct bpf_prog_info) ||
+ memcmp(&prog_info, &prog_infos[i], info_len) ||
+ *(int *)prog_info.map_ids != saved_map_id,
+ "get-prog-info(next_id->fd)",
+ "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n",
+ err, errno, info_len, sizeof(struct bpf_prog_info),
+ memcmp(&prog_info, &prog_infos[i], info_len),
+ *(int *)prog_info.map_ids, saved_map_id);
+ close(prog_fd);
+ }
+ CHECK(nr_id_found != nr_iters,
+ "check total prog id found by get_next_id",
+ "nr_id_found %u(%u)\n",
+ nr_id_found, nr_iters);
+
+ /* Check bpf_map_get_next_id() */
+ nr_id_found = 0;
+ next_id = 0;
+ while (!bpf_map_get_next_id(next_id, &next_id)) {
+ struct bpf_map_info map_info = {};
+ int map_fd;
+
+ info_len = sizeof(map_info);
+
+ map_fd = bpf_map_get_fd_by_id(next_id);
+ if (map_fd < 0 && errno == ENOENT)
+ /* The bpf_map is in the dead row */
+ continue;
+ if (CHECK(map_fd < 0, "get-map-fd(next_id)",
+ "map_fd %d next_id %u errno %d\n",
+ map_fd, next_id, errno))
+ break;
+
+ for (i = 0; i < nr_iters; i++)
+ if (map_infos[i].id == next_id)
+ break;
+
+ if (i == nr_iters)
+ continue;
+
+ nr_id_found++;
+
+ err = bpf_map_lookup_elem(map_fd, &array_key, &array_value);
+ assert(!err);
+
+ err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+ CHECK(err || info_len != sizeof(struct bpf_map_info) ||
+ memcmp(&map_info, &map_infos[i], info_len) ||
+ array_value != array_magic_value,
+ "check get-map-info(next_id->fd)",
+ "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n",
+ err, errno, info_len, sizeof(struct bpf_map_info),
+ memcmp(&map_info, &map_infos[i], info_len),
+ array_value, array_magic_value);
+
+ close(map_fd);
+ }
+ CHECK(nr_id_found != nr_iters,
+ "check total map id found by get_next_id",
+ "nr_id_found %u(%u)\n",
+ nr_id_found, nr_iters);
+
+done:
+ for (i = 0; i < nr_iters; i++)
+ bpf_object__close(objs[i]);
+}
+
+static void test_pkt_md_access(void)
+{
+ const char *file = "./test_pkt_md_access.o";
+ struct bpf_object *obj;
+ __u32 duration, retval;
+ int err, prog_fd;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+
+ bpf_object__close(obj);
+}
+
+static void test_obj_name(void)
+{
+ struct {
+ const char *name;
+ int success;
+ int expected_errno;
+ } tests[] = {
+ { "", 1, 0 },
+ { "_123456789ABCDE", 1, 0 },
+ { "_123456789ABCDEF", 0, EINVAL },
+ { "_123456789ABCD\n", 0, EINVAL },
+ };
+ struct bpf_insn prog[] = {
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ __u32 duration = 0;
+ int i;
+
+ for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+ size_t name_len = strlen(tests[i].name) + 1;
+ union bpf_attr attr;
+ size_t ncopy;
+ int fd;
+
+ /* test different attr.prog_name during BPF_PROG_LOAD */
+ ncopy = name_len < sizeof(attr.prog_name) ?
+ name_len : sizeof(attr.prog_name);
+ bzero(&attr, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SCHED_CLS;
+ attr.insn_cnt = 2;
+ attr.insns = ptr_to_u64(prog);
+ attr.license = ptr_to_u64("");
+ memcpy(attr.prog_name, tests[i].name, ncopy);
+
+ fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+ CHECK((tests[i].success && fd < 0) ||
+ (!tests[i].success && fd != -1) ||
+ (!tests[i].success && errno != tests[i].expected_errno),
+ "check-bpf-prog-name",
+ "fd %d(%d) errno %d(%d)\n",
+ fd, tests[i].success, errno, tests[i].expected_errno);
+
+ if (fd != -1)
+ close(fd);
+
+ /* test different attr.map_name during BPF_MAP_CREATE */
+ ncopy = name_len < sizeof(attr.map_name) ?
+ name_len : sizeof(attr.map_name);
+ bzero(&attr, sizeof(attr));
+ attr.map_type = BPF_MAP_TYPE_ARRAY;
+ attr.key_size = 4;
+ attr.value_size = 4;
+ attr.max_entries = 1;
+ attr.map_flags = 0;
+ memcpy(attr.map_name, tests[i].name, ncopy);
+ fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
+ CHECK((tests[i].success && fd < 0) ||
+ (!tests[i].success && fd != -1) ||
+ (!tests[i].success && errno != tests[i].expected_errno),
+ "check-bpf-map-name",
+ "fd %d(%d) errno %d(%d)\n",
+ fd, tests[i].success, errno, tests[i].expected_errno);
+
+ if (fd != -1)
+ close(fd);
+ }
+}
+
+static void test_tp_attach_query(void)
+{
+ const int num_progs = 3;
+ int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs];
+ __u32 duration = 0, info_len, saved_prog_ids[num_progs];
+ const char *file = "./test_tracepoint.o";
+ struct perf_event_query_bpf *query;
+ struct perf_event_attr attr = {};
+ struct bpf_object *obj[num_progs];
+ struct bpf_prog_info prog_info;
+ char buf[256];
+
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ return;
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
+ "read", "bytes %d errno %d\n", bytes, errno))
+ return;
+
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+
+ query = malloc(sizeof(*query) + sizeof(__u32) * num_progs);
+ for (i = 0; i < num_progs; i++) {
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i],
+ &prog_fd[i]);
+ if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ goto cleanup1;
+
+ bzero(&prog_info, sizeof(prog_info));
+ prog_info.jited_prog_len = 0;
+ prog_info.xlated_prog_len = 0;
+ prog_info.nr_map_ids = 0;
+ info_len = sizeof(prog_info);
+ err = bpf_obj_get_info_by_fd(prog_fd[i], &prog_info, &info_len);
+ if (CHECK(err, "bpf_obj_get_info_by_fd", "err %d errno %d\n",
+ err, errno))
+ goto cleanup1;
+ saved_prog_ids[i] = prog_info.id;
+
+ pmu_fd[i] = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (CHECK(pmu_fd[i] < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd[i], errno))
+ goto cleanup2;
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+ err, errno))
+ goto cleanup3;
+
+ if (i == 0) {
+ /* check NULL prog array query */
+ query->ids_len = num_progs;
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+ if (CHECK(err || query->prog_cnt != 0,
+ "perf_event_ioc_query_bpf",
+ "err %d errno %d query->prog_cnt %u\n",
+ err, errno, query->prog_cnt))
+ goto cleanup3;
+ }
+
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[i]);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+ err, errno))
+ goto cleanup3;
+
+ if (i == 1) {
+ /* try to get # of programs only */
+ query->ids_len = 0;
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+ if (CHECK(err || query->prog_cnt != 2,
+ "perf_event_ioc_query_bpf",
+ "err %d errno %d query->prog_cnt %u\n",
+ err, errno, query->prog_cnt))
+ goto cleanup3;
+
+ /* try a few negative tests */
+ /* invalid query pointer */
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF,
+ (struct perf_event_query_bpf *)0x1);
+ if (CHECK(!err || errno != EFAULT,
+ "perf_event_ioc_query_bpf",
+ "err %d errno %d\n", err, errno))
+ goto cleanup3;
+
+ /* no enough space */
+ query->ids_len = 1;
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+ if (CHECK(!err || errno != ENOSPC || query->prog_cnt != 2,
+ "perf_event_ioc_query_bpf",
+ "err %d errno %d query->prog_cnt %u\n",
+ err, errno, query->prog_cnt))
+ goto cleanup3;
+ }
+
+ query->ids_len = num_progs;
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+ if (CHECK(err || query->prog_cnt != (i + 1),
+ "perf_event_ioc_query_bpf",
+ "err %d errno %d query->prog_cnt %u\n",
+ err, errno, query->prog_cnt))
+ goto cleanup3;
+ for (j = 0; j < i + 1; j++)
+ if (CHECK(saved_prog_ids[j] != query->ids[j],
+ "perf_event_ioc_query_bpf",
+ "#%d saved_prog_id %x query prog_id %x\n",
+ j, saved_prog_ids[j], query->ids[j]))
+ goto cleanup3;
+ }
+
+ i = num_progs - 1;
+ for (; i >= 0; i--) {
+ cleanup3:
+ ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE);
+ cleanup2:
+ close(pmu_fd[i]);
+ cleanup1:
+ bpf_object__close(obj[i]);
+ }
+ free(query);
+}
+
+static int compare_map_keys(int map1_fd, int map2_fd)
+{
+ __u32 key, next_key;
+ char val_buf[PERF_MAX_STACK_DEPTH *
+ sizeof(struct bpf_stack_build_id)];
+ int err;
+
+ err = bpf_map_get_next_key(map1_fd, NULL, &key);
+ if (err)
+ return err;
+ err = bpf_map_lookup_elem(map2_fd, &key, val_buf);
+ if (err)
+ return err;
+
+ while (bpf_map_get_next_key(map1_fd, &key, &next_key) == 0) {
+ err = bpf_map_lookup_elem(map2_fd, &next_key, val_buf);
+ if (err)
+ return err;
+
+ key = next_key;
+ }
+ if (errno != ENOENT)
+ return -1;
+
+ return 0;
+}
+
+static int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len)
+{
+ __u32 key, next_key, *cur_key_p, *next_key_p;
+ char *val_buf1, *val_buf2;
+ int i, err = 0;
+
+ val_buf1 = malloc(stack_trace_len);
+ val_buf2 = malloc(stack_trace_len);
+ cur_key_p = NULL;
+ next_key_p = &key;
+ while (bpf_map_get_next_key(smap_fd, cur_key_p, next_key_p) == 0) {
+ err = bpf_map_lookup_elem(smap_fd, next_key_p, val_buf1);
+ if (err)
+ goto out;
+ err = bpf_map_lookup_elem(amap_fd, next_key_p, val_buf2);
+ if (err)
+ goto out;
+ for (i = 0; i < stack_trace_len; i++) {
+ if (val_buf1[i] != val_buf2[i]) {
+ err = -1;
+ goto out;
+ }
+ }
+ key = *next_key_p;
+ cur_key_p = &key;
+ next_key_p = &next_key;
+ }
+ if (errno != ENOENT)
+ err = -1;
+
+out:
+ free(val_buf1);
+ free(val_buf2);
+ return err;
+}
+
+static void test_stacktrace_map()
+{
+ int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+ const char *file = "./test_stacktrace_map.o";
+ int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
+ struct perf_event_attr attr = {};
+ __u32 key, val, duration = 0;
+ struct bpf_object *obj;
+ char buf[256];
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ return;
+
+ /* Get the ID for the sched/sched_switch tracepoint */
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (bytes <= 0 || bytes >= sizeof(buf))
+ goto close_prog;
+
+ /* Open the perf event and attach bpf progrram */
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto close_prog;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (err)
+ goto disable_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (err)
+ goto disable_pmu;
+
+ /* find map fds */
+ control_map_fd = bpf_find_map(__func__, obj, "control_map");
+ if (control_map_fd < 0)
+ goto disable_pmu;
+
+ stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+ if (stackid_hmap_fd < 0)
+ goto disable_pmu;
+
+ stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+ if (stackmap_fd < 0)
+ goto disable_pmu;
+
+ stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+ if (stack_amap_fd < 0)
+ goto disable_pmu;
+
+ /* give some time for bpf program run */
+ sleep(1);
+
+ /* disable stack trace collection */
+ key = 0;
+ val = 1;
+ bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+ /* for every element in stackid_hmap, we can find a corresponding one
+ * in stackmap, and vise versa.
+ */
+ err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+ if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu_noerr;
+
+ err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+ if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu_noerr;
+
+ stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
+ err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
+ if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu_noerr;
+
+ goto disable_pmu_noerr;
+disable_pmu:
+ error_cnt++;
+disable_pmu_noerr:
+ ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+ close(pmu_fd);
+close_prog:
+ bpf_object__close(obj);
+}
+
+static void test_stacktrace_map_raw_tp()
+{
+ int control_map_fd, stackid_hmap_fd, stackmap_fd;
+ const char *file = "./test_stacktrace_map.o";
+ int efd, err, prog_fd;
+ __u32 key, val, duration = 0;
+ struct bpf_object *obj;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+ return;
+
+ efd = bpf_raw_tracepoint_open("sched_switch", prog_fd);
+ if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+
+ /* find map fds */
+ control_map_fd = bpf_find_map(__func__, obj, "control_map");
+ if (control_map_fd < 0)
+ goto close_prog;
+
+ stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+ if (stackid_hmap_fd < 0)
+ goto close_prog;
+
+ stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+ if (stackmap_fd < 0)
+ goto close_prog;
+
+ /* give some time for bpf program run */
+ sleep(1);
+
+ /* disable stack trace collection */
+ key = 0;
+ val = 1;
+ bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+ /* for every element in stackid_hmap, we can find a corresponding one
+ * in stackmap, and vise versa.
+ */
+ err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+ if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+ "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+ if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ goto close_prog_noerr;
+close_prog:
+ error_cnt++;
+close_prog_noerr:
+ bpf_object__close(obj);
+}
+
+static int extract_build_id(char *build_id, size_t size)
+{
+ FILE *fp;
+ char *line = NULL;
+ size_t len = 0;
+
+ fp = popen("readelf -n ./urandom_read | grep 'Build ID'", "r");
+ if (fp == NULL)
+ return -1;
+
+ if (getline(&line, &len, fp) == -1)
+ goto err;
+ pclose(fp);
+
+ if (len > size)
+ len = size;
+ memcpy(build_id, line, len);
+ build_id[len] = '\0';
+ free(line);
+ return 0;
+err:
+ pclose(fp);
+ return -1;
+}
+
+static void test_stacktrace_build_id(void)
+{
+ int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+ const char *file = "./test_stacktrace_build_id.o";
+ int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
+ struct perf_event_attr attr = {};
+ __u32 key, previous_key, val, duration = 0;
+ struct bpf_object *obj;
+ char buf[256];
+ int i, j;
+ struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
+ int build_id_matches = 0;
+ int retry = 1;
+
+retry:
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* Get the ID for the sched/sched_switch tracepoint */
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/random/urandom_read/id");
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
+ "read", "bytes %d errno %d\n", bytes, errno))
+ goto close_prog;
+
+ /* Open the perf event and attach bpf progrram */
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto close_prog;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+ err, errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+ err, errno))
+ goto disable_pmu;
+
+ /* find map fds */
+ control_map_fd = bpf_find_map(__func__, obj, "control_map");
+ if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+ if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+ if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+ err, errno))
+ goto disable_pmu;
+
+ stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+ if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
+ == 0);
+ assert(system("./urandom_read") == 0);
+ /* disable stack trace collection */
+ key = 0;
+ val = 1;
+ bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+ /* for every element in stackid_hmap, we can find a corresponding one
+ * in stackmap, and vise versa.
+ */
+ err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+ if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+ if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = extract_build_id(buf, 256);
+
+ if (CHECK(err, "get build_id with readelf",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+ if (CHECK(err, "get_next_key from stackmap",
+ "err %d, errno %d\n", err, errno))
+ goto disable_pmu;
+
+ do {
+ char build_id[64];
+
+ err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+ if (CHECK(err, "lookup_elem from stackmap",
+ "err %d, errno %d\n", err, errno))
+ goto disable_pmu;
+ for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
+ if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
+ id_offs[i].offset != 0) {
+ for (j = 0; j < 20; ++j)
+ sprintf(build_id + 2 * j, "%02x",
+ id_offs[i].build_id[j] & 0xff);
+ if (strstr(buf, build_id) != NULL)
+ build_id_matches = 1;
+ }
+ previous_key = key;
+ } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+
+ /* stack_map_get_build_id_offset() is racy and sometimes can return
+ * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID;
+ * try it one more time.
+ */
+ if (build_id_matches < 1 && retry--) {
+ ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+ close(pmu_fd);
+ bpf_object__close(obj);
+ printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
+ __func__);
+ goto retry;
+ }
+
+ if (CHECK(build_id_matches < 1, "build id match",
+ "Didn't find expected build ID from the map\n"))
+ goto disable_pmu;
+
+ stack_trace_len = PERF_MAX_STACK_DEPTH
+ * sizeof(struct bpf_stack_build_id);
+ err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
+ CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
+ "err %d errno %d\n", err, errno);
+
+disable_pmu:
+ ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+ close(pmu_fd);
+
+close_prog:
+ bpf_object__close(obj);
+
+out:
+ return;
+}
+
+static void test_stacktrace_build_id_nmi(void)
+{
+ int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+ const char *file = "./test_stacktrace_build_id.o";
+ int err, pmu_fd, prog_fd;
+ struct perf_event_attr attr = {
+ .sample_freq = 5000,
+ .freq = 1,
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ };
+ __u32 key, previous_key, val, duration = 0;
+ struct bpf_object *obj;
+ char buf[256];
+ int i, j;
+ struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
+ int build_id_matches = 0;
+ int retry = 1;
+
+retry:
+ err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
+ if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ return;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (CHECK(pmu_fd < 0, "perf_event_open",
+ "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
+ pmu_fd, errno))
+ goto close_prog;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+ err, errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+ err, errno))
+ goto disable_pmu;
+
+ /* find map fds */
+ control_map_fd = bpf_find_map(__func__, obj, "control_map");
+ if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+ if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+ if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+ err, errno))
+ goto disable_pmu;
+
+ stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+ if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
+ == 0);
+ assert(system("taskset 0x1 ./urandom_read 100000") == 0);
+ /* disable stack trace collection */
+ key = 0;
+ val = 1;
+ bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+ /* for every element in stackid_hmap, we can find a corresponding one
+ * in stackmap, and vise versa.
+ */
+ err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+ if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+ if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = extract_build_id(buf, 256);
+
+ if (CHECK(err, "get build_id with readelf",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+ if (CHECK(err, "get_next_key from stackmap",
+ "err %d, errno %d\n", err, errno))
+ goto disable_pmu;
+
+ do {
+ char build_id[64];
+
+ err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+ if (CHECK(err, "lookup_elem from stackmap",
+ "err %d, errno %d\n", err, errno))
+ goto disable_pmu;
+ for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
+ if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
+ id_offs[i].offset != 0) {
+ for (j = 0; j < 20; ++j)
+ sprintf(build_id + 2 * j, "%02x",
+ id_offs[i].build_id[j] & 0xff);
+ if (strstr(buf, build_id) != NULL)
+ build_id_matches = 1;
+ }
+ previous_key = key;
+ } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+
+ /* stack_map_get_build_id_offset() is racy and sometimes can return
+ * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID;
+ * try it one more time.
+ */
+ if (build_id_matches < 1 && retry--) {
+ ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+ close(pmu_fd);
+ bpf_object__close(obj);
+ printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
+ __func__);
+ goto retry;
+ }
+
+ if (CHECK(build_id_matches < 1, "build id match",
+ "Didn't find expected build ID from the map\n"))
+ goto disable_pmu;
+
+ /*
+ * We intentionally skip compare_stack_ips(). This is because we
+ * only support one in_nmi() ips-to-build_id translation per cpu
+ * at any time, thus stack_amap here will always fallback to
+ * BPF_STACK_BUILD_ID_IP;
+ */
+
+disable_pmu:
+ ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+ close(pmu_fd);
+
+close_prog:
+ bpf_object__close(obj);
+}
+
+#define MAX_CNT_RAWTP 10ull
+#define MAX_STACK_RAWTP 100
+struct get_stack_trace_t {
+ int pid;
+ int kern_stack_size;
+ int user_stack_size;
+ int user_stack_buildid_size;
+ __u64 kern_stack[MAX_STACK_RAWTP];
+ __u64 user_stack[MAX_STACK_RAWTP];
+ struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
+};
+
+static int get_stack_print_output(void *data, int size)
+{
+ bool good_kern_stack = false, good_user_stack = false;
+ const char *nonjit_func = "___bpf_prog_run";
+ struct get_stack_trace_t *e = data;
+ int i, num_stack;
+ static __u64 cnt;
+ struct ksym *ks;
+
+ cnt++;
+
+ if (size < sizeof(struct get_stack_trace_t)) {
+ __u64 *raw_data = data;
+ bool found = false;
+
+ num_stack = size / sizeof(__u64);
+ /* If jit is enabled, we do not have a good way to
+ * verify the sanity of the kernel stack. So we
+ * just assume it is good if the stack is not empty.
+ * This could be improved in the future.
+ */
+ if (jit_enabled) {
+ found = num_stack > 0;
+ } else {
+ for (i = 0; i < num_stack; i++) {
+ ks = ksym_search(raw_data[i]);
+ if (strcmp(ks->name, nonjit_func) == 0) {
+ found = true;
+ break;
+ }
+ }
+ }
+ if (found) {
+ good_kern_stack = true;
+ good_user_stack = true;
+ }
+ } else {
+ num_stack = e->kern_stack_size / sizeof(__u64);
+ if (jit_enabled) {
+ good_kern_stack = num_stack > 0;
+ } else {
+ for (i = 0; i < num_stack; i++) {
+ ks = ksym_search(e->kern_stack[i]);
+ if (strcmp(ks->name, nonjit_func) == 0) {
+ good_kern_stack = true;
+ break;
+ }
+ }
+ }
+ if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0)
+ good_user_stack = true;
+ }
+ if (!good_kern_stack || !good_user_stack)
+ return LIBBPF_PERF_EVENT_ERROR;
+
+ if (cnt == MAX_CNT_RAWTP)
+ return LIBBPF_PERF_EVENT_DONE;
+
+ return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void test_get_stack_raw_tp(void)
+{
+ const char *file = "./test_get_stack_rawtp.o";
+ int i, efd, err, prog_fd, pmu_fd, perfmap_fd;
+ struct perf_event_attr attr = {};
+ struct timespec tv = {0, 10};
+ __u32 key = 0, duration = 0;
+ struct bpf_object *obj;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+ return;
+
+ efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
+ if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+
+ perfmap_fd = bpf_find_map(__func__, obj, "perfmap");
+ if (CHECK(perfmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+ perfmap_fd, errno))
+ goto close_prog;
+
+ err = load_kallsyms();
+ if (CHECK(err < 0, "load_kallsyms", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_BPF_OUTPUT;
+ pmu_fd = syscall(__NR_perf_event_open, &attr, getpid()/*pid*/, -1/*cpu*/,
+ -1/*group_fd*/, 0);
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
+ errno))
+ goto close_prog;
+
+ err = bpf_map_update_elem(perfmap_fd, &key, &pmu_fd, BPF_ANY);
+ if (CHECK(err < 0, "bpf_map_update_elem", "err %d errno %d\n", err,
+ errno))
+ goto close_prog;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err < 0, "ioctl PERF_EVENT_IOC_ENABLE", "err %d errno %d\n",
+ err, errno))
+ goto close_prog;
+
+ err = perf_event_mmap(pmu_fd);
+ if (CHECK(err < 0, "perf_event_mmap", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ /* trigger some syscall action */
+ for (i = 0; i < MAX_CNT_RAWTP; i++)
+ nanosleep(&tv, NULL);
+
+ err = perf_event_poller(pmu_fd, get_stack_print_output);
+ if (CHECK(err < 0, "perf_event_poller", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ goto close_prog_noerr;
+close_prog:
+ error_cnt++;
+close_prog_noerr:
+ bpf_object__close(obj);
+}
+
+static void test_task_fd_query_rawtp(void)
+{
+ const char *file = "./test_get_stack_rawtp.o";
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ struct bpf_object *obj;
+ int efd, err, prog_fd;
+ __u32 duration = 0;
+ char buf[256];
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+ return;
+
+ efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
+ if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+
+ /* query (getpid(), efd) */
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
+ errno))
+ goto close_prog;
+
+ err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+ strcmp(buf, "sys_enter") == 0;
+ if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
+ fd_type, buf))
+ goto close_prog;
+
+ /* test zero len */
+ len = 0;
+ err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err < 0, "bpf_task_fd_query (len = 0)", "err %d errno %d\n",
+ err, errno))
+ goto close_prog;
+ err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+ len == strlen("sys_enter");
+ if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+ goto close_prog;
+
+ /* test empty buffer */
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), efd, 0, 0, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err < 0, "bpf_task_fd_query (buf = 0)", "err %d errno %d\n",
+ err, errno))
+ goto close_prog;
+ err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+ len == strlen("sys_enter");
+ if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+ goto close_prog;
+
+ /* test smaller buffer */
+ len = 3;
+ err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err >= 0 || errno != ENOSPC, "bpf_task_fd_query (len = 3)",
+ "err %d errno %d\n", err, errno))
+ goto close_prog;
+ err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+ len == strlen("sys_enter") &&
+ strcmp(buf, "sy") == 0;
+ if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+ goto close_prog;
+
+ goto close_prog_noerr;
+close_prog:
+ error_cnt++;
+close_prog_noerr:
+ bpf_object__close(obj);
+}
+
+static void test_task_fd_query_tp_core(const char *probe_name,
+ const char *tp_name)
+{
+ const char *file = "./test_tracepoint.o";
+ int err, bytes, efd, prog_fd, pmu_fd;
+ struct perf_event_attr attr = {};
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ struct bpf_object *obj;
+ __u32 duration = 0;
+ char buf[256];
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
+ "bytes %d errno %d\n", bytes, errno))
+ goto close_prog;
+
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (CHECK(err, "perf_event_open", "err %d errno %d\n", err, errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ /* query (getpid(), pmu_fd) */
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), pmu_fd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ err = (fd_type == BPF_FD_TYPE_TRACEPOINT) && !strcmp(buf, tp_name);
+ if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
+ fd_type, buf))
+ goto close_pmu;
+
+ close(pmu_fd);
+ goto close_prog_noerr;
+
+close_pmu:
+ close(pmu_fd);
+close_prog:
+ error_cnt++;
+close_prog_noerr:
+ bpf_object__close(obj);
+}
+
+static void test_task_fd_query_tp(void)
+{
+ test_task_fd_query_tp_core("sched/sched_switch",
+ "sched_switch");
+ test_task_fd_query_tp_core("syscalls/sys_enter_read",
+ "sys_enter_read");
+}
+
+int main(void)
+{
+ jit_enabled = is_jit_enabled();
+
+ test_pkt_access();
+ test_xdp();
+ test_xdp_adjust_tail();
+ test_l4lb_all();
+ test_xdp_noinline();
+ test_tcp_estats();
+ test_bpf_obj_id();
+ test_pkt_md_access();
+ test_obj_name();
+ test_tp_attach_query();
+ test_stacktrace_map();
+ test_stacktrace_build_id();
+ test_stacktrace_build_id_nmi();
+ test_stacktrace_map_raw_tp();
+ test_get_stack_raw_tp();
+ test_task_fd_query_rawtp();
+ test_task_fd_query_tp();
+
+ printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
+ return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/bpf/test_select_reuseport.c b/tools/testing/selftests/bpf/test_select_reuseport.c
new file mode 100644
index 000000000..b14d25bfa
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_select_reuseport.c
@@ -0,0 +1,700 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <sys/types.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "test_select_reuseport_common.h"
+
+#define MIN_TCPHDR_LEN 20
+#define UDPHDR_LEN 8
+
+#define TCP_SYNCOOKIE_SYSCTL "/proc/sys/net/ipv4/tcp_syncookies"
+#define TCP_FO_SYSCTL "/proc/sys/net/ipv4/tcp_fastopen"
+#define REUSEPORT_ARRAY_SIZE 32
+
+static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
+static __u32 expected_results[NR_RESULTS];
+static int sk_fds[REUSEPORT_ARRAY_SIZE];
+static int reuseport_array, outer_map;
+static int select_by_skb_data_prog;
+static int saved_tcp_syncookie;
+static struct bpf_object *obj;
+static int saved_tcp_fo;
+static __u32 index_zero;
+static int epfd;
+
+static union sa46 {
+ struct sockaddr_in6 v6;
+ struct sockaddr_in v4;
+ sa_family_t family;
+} srv_sa;
+
+#define CHECK(condition, tag, format...) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \
+ printf(format); \
+ exit(-1); \
+ } \
+})
+
+static void create_maps(void)
+{
+ struct bpf_create_map_attr attr = {};
+
+ /* Creating reuseport_array */
+ attr.name = "reuseport_array";
+ attr.map_type = BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
+ attr.key_size = sizeof(__u32);
+ attr.value_size = sizeof(__u32);
+ attr.max_entries = REUSEPORT_ARRAY_SIZE;
+
+ reuseport_array = bpf_create_map_xattr(&attr);
+ CHECK(reuseport_array == -1, "creating reuseport_array",
+ "reuseport_array:%d errno:%d\n", reuseport_array, errno);
+
+ /* Creating outer_map */
+ attr.name = "outer_map";
+ attr.map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS;
+ attr.key_size = sizeof(__u32);
+ attr.value_size = sizeof(__u32);
+ attr.max_entries = 1;
+ attr.inner_map_fd = reuseport_array;
+ outer_map = bpf_create_map_xattr(&attr);
+ CHECK(outer_map == -1, "creating outer_map",
+ "outer_map:%d errno:%d\n", outer_map, errno);
+}
+
+static void prepare_bpf_obj(void)
+{
+ struct bpf_program *prog;
+ struct bpf_map *map;
+ int err;
+ struct bpf_object_open_attr attr = {
+ .file = "test_select_reuseport_kern.o",
+ .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
+ };
+
+ obj = bpf_object__open_xattr(&attr);
+ CHECK(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
+ "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
+
+ prog = bpf_program__next(NULL, obj);
+ CHECK(!prog, "get first bpf_program", "!prog\n");
+ bpf_program__set_type(prog, attr.prog_type);
+
+ map = bpf_object__find_map_by_name(obj, "outer_map");
+ CHECK(!map, "find outer_map", "!map\n");
+ err = bpf_map__reuse_fd(map, outer_map);
+ CHECK(err, "reuse outer_map", "err:%d\n", err);
+
+ err = bpf_object__load(obj);
+ CHECK(err, "load bpf_object", "err:%d\n", err);
+
+ select_by_skb_data_prog = bpf_program__fd(prog);
+ CHECK(select_by_skb_data_prog == -1, "get prog fd",
+ "select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
+
+ map = bpf_object__find_map_by_name(obj, "result_map");
+ CHECK(!map, "find result_map", "!map\n");
+ result_map = bpf_map__fd(map);
+ CHECK(result_map == -1, "get result_map fd",
+ "result_map:%d\n", result_map);
+
+ map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
+ CHECK(!map, "find tmp_index_ovr_map", "!map\n");
+ tmp_index_ovr_map = bpf_map__fd(map);
+ CHECK(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
+ "tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
+
+ map = bpf_object__find_map_by_name(obj, "linum_map");
+ CHECK(!map, "find linum_map", "!map\n");
+ linum_map = bpf_map__fd(map);
+ CHECK(linum_map == -1, "get linum_map fd",
+ "linum_map:%d\n", linum_map);
+
+ map = bpf_object__find_map_by_name(obj, "data_check_map");
+ CHECK(!map, "find data_check_map", "!map\n");
+ data_check_map = bpf_map__fd(map);
+ CHECK(data_check_map == -1, "get data_check_map fd",
+ "data_check_map:%d\n", data_check_map);
+}
+
+static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
+{
+ memset(sa, 0, sizeof(*sa));
+ sa->family = family;
+ if (sa->family == AF_INET6)
+ sa->v6.sin6_addr = in6addr_loopback;
+ else
+ sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+}
+
+static void sa46_init_inany(union sa46 *sa, sa_family_t family)
+{
+ memset(sa, 0, sizeof(*sa));
+ sa->family = family;
+ if (sa->family == AF_INET6)
+ sa->v6.sin6_addr = in6addr_any;
+ else
+ sa->v4.sin_addr.s_addr = INADDR_ANY;
+}
+
+static int read_int_sysctl(const char *sysctl)
+{
+ char buf[16];
+ int fd, ret;
+
+ fd = open(sysctl, 0);
+ CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n",
+ sysctl, fd, errno);
+
+ ret = read(fd, buf, sizeof(buf));
+ CHECK(ret <= 0, "read(sysctl)", "sysctl:%s ret:%d errno:%d\n",
+ sysctl, ret, errno);
+ close(fd);
+
+ return atoi(buf);
+}
+
+static void write_int_sysctl(const char *sysctl, int v)
+{
+ int fd, ret, size;
+ char buf[16];
+
+ fd = open(sysctl, O_RDWR);
+ CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n",
+ sysctl, fd, errno);
+
+ size = snprintf(buf, sizeof(buf), "%d", v);
+ ret = write(fd, buf, size);
+ CHECK(ret != size, "write(sysctl)",
+ "sysctl:%s ret:%d size:%d errno:%d\n", sysctl, ret, size, errno);
+ close(fd);
+}
+
+static void restore_sysctls(void)
+{
+ write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo);
+ write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie);
+}
+
+static void enable_fastopen(void)
+{
+ int fo;
+
+ fo = read_int_sysctl(TCP_FO_SYSCTL);
+ write_int_sysctl(TCP_FO_SYSCTL, fo | 7);
+}
+
+static void enable_syncookie(void)
+{
+ write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2);
+}
+
+static void disable_syncookie(void)
+{
+ write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0);
+}
+
+static __u32 get_linum(void)
+{
+ __u32 linum;
+ int err;
+
+ err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
+ CHECK(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
+ err, errno);
+
+ return linum;
+}
+
+static void check_data(int type, sa_family_t family, const struct cmd *cmd,
+ int cli_fd)
+{
+ struct data_check expected = {}, result;
+ union sa46 cli_sa;
+ socklen_t addrlen;
+ int err;
+
+ addrlen = sizeof(cli_sa);
+ err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
+ &addrlen);
+ CHECK(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
+ err, errno);
+
+ err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
+ CHECK(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
+ err, errno);
+
+ if (type == SOCK_STREAM) {
+ expected.len = MIN_TCPHDR_LEN;
+ expected.ip_protocol = IPPROTO_TCP;
+ } else {
+ expected.len = UDPHDR_LEN;
+ expected.ip_protocol = IPPROTO_UDP;
+ }
+
+ if (family == AF_INET6) {
+ expected.eth_protocol = htons(ETH_P_IPV6);
+ expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] &&
+ !srv_sa.v6.sin6_addr.s6_addr32[2] &&
+ !srv_sa.v6.sin6_addr.s6_addr32[1] &&
+ !srv_sa.v6.sin6_addr.s6_addr32[0];
+
+ memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32,
+ sizeof(cli_sa.v6.sin6_addr));
+ memcpy(&expected.skb_addrs[4], &in6addr_loopback,
+ sizeof(in6addr_loopback));
+ expected.skb_ports[0] = cli_sa.v6.sin6_port;
+ expected.skb_ports[1] = srv_sa.v6.sin6_port;
+ } else {
+ expected.eth_protocol = htons(ETH_P_IP);
+ expected.bind_inany = !srv_sa.v4.sin_addr.s_addr;
+
+ expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr;
+ expected.skb_addrs[1] = htonl(INADDR_LOOPBACK);
+ expected.skb_ports[0] = cli_sa.v4.sin_port;
+ expected.skb_ports[1] = srv_sa.v4.sin_port;
+ }
+
+ if (memcmp(&result, &expected, offsetof(struct data_check,
+ equal_check_end))) {
+ printf("unexpected data_check\n");
+ printf(" result: (0x%x, %u, %u)\n",
+ result.eth_protocol, result.ip_protocol,
+ result.bind_inany);
+ printf("expected: (0x%x, %u, %u)\n",
+ expected.eth_protocol, expected.ip_protocol,
+ expected.bind_inany);
+ CHECK(1, "data_check result != expected",
+ "bpf_prog_linum:%u\n", get_linum());
+ }
+
+ CHECK(!result.hash, "data_check result.hash empty",
+ "result.hash:%u", result.hash);
+
+ expected.len += cmd ? sizeof(*cmd) : 0;
+ if (type == SOCK_STREAM)
+ CHECK(expected.len > result.len, "expected.len > result.len",
+ "expected.len:%u result.len:%u bpf_prog_linum:%u\n",
+ expected.len, result.len, get_linum());
+ else
+ CHECK(expected.len != result.len, "expected.len != result.len",
+ "expected.len:%u result.len:%u bpf_prog_linum:%u\n",
+ expected.len, result.len, get_linum());
+}
+
+static void check_results(void)
+{
+ __u32 results[NR_RESULTS];
+ __u32 i, broken = 0;
+ int err;
+
+ for (i = 0; i < NR_RESULTS; i++) {
+ err = bpf_map_lookup_elem(result_map, &i, &results[i]);
+ CHECK(err == -1, "lookup_elem(result_map)",
+ "i:%u err:%d errno:%d\n", i, err, errno);
+ }
+
+ for (i = 0; i < NR_RESULTS; i++) {
+ if (results[i] != expected_results[i]) {
+ broken = i;
+ break;
+ }
+ }
+
+ if (i == NR_RESULTS)
+ return;
+
+ printf("unexpected result\n");
+ printf(" result: [");
+ printf("%u", results[0]);
+ for (i = 1; i < NR_RESULTS; i++)
+ printf(", %u", results[i]);
+ printf("]\n");
+
+ printf("expected: [");
+ printf("%u", expected_results[0]);
+ for (i = 1; i < NR_RESULTS; i++)
+ printf(", %u", expected_results[i]);
+ printf("]\n");
+
+ CHECK(expected_results[broken] != results[broken],
+ "unexpected result",
+ "expected_results[%u] != results[%u] bpf_prog_linum:%u\n",
+ broken, broken, get_linum());
+}
+
+static int send_data(int type, sa_family_t family, void *data, size_t len,
+ enum result expected)
+{
+ union sa46 cli_sa;
+ int fd, err;
+
+ fd = socket(family, type, 0);
+ CHECK(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
+
+ sa46_init_loopback(&cli_sa, family);
+ err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
+ CHECK(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
+
+ err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
+ sizeof(srv_sa));
+ CHECK(err != len && expected >= PASS,
+ "sendto()", "family:%u err:%d errno:%d expected:%d\n",
+ family, err, errno, expected);
+
+ return fd;
+}
+
+static void do_test(int type, sa_family_t family, struct cmd *cmd,
+ enum result expected)
+{
+ int nev, srv_fd, cli_fd;
+ struct epoll_event ev;
+ struct cmd rcv_cmd;
+ ssize_t nread;
+
+ cli_fd = send_data(type, family, cmd, cmd ? sizeof(*cmd) : 0,
+ expected);
+ nev = epoll_wait(epfd, &ev, 1, expected >= PASS ? 5 : 0);
+ CHECK((nev <= 0 && expected >= PASS) ||
+ (nev > 0 && expected < PASS),
+ "nev <> expected",
+ "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n",
+ nev, expected, type, family,
+ cmd ? cmd->reuseport_index : -1,
+ cmd ? cmd->pass_on_failure : -1);
+ check_results();
+ check_data(type, family, cmd, cli_fd);
+
+ if (expected < PASS)
+ return;
+
+ CHECK(expected != PASS_ERR_SK_SELECT_REUSEPORT &&
+ cmd->reuseport_index != ev.data.u32,
+ "check cmd->reuseport_index",
+ "cmd:(%u, %u) ev.data.u32:%u\n",
+ cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32);
+
+ srv_fd = sk_fds[ev.data.u32];
+ if (type == SOCK_STREAM) {
+ int new_fd = accept(srv_fd, NULL, 0);
+
+ CHECK(new_fd == -1, "accept(srv_fd)",
+ "ev.data.u32:%u new_fd:%d errno:%d\n",
+ ev.data.u32, new_fd, errno);
+
+ nread = recv(new_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
+ CHECK(nread != sizeof(rcv_cmd),
+ "recv(new_fd)",
+ "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
+ ev.data.u32, nread, sizeof(rcv_cmd), errno);
+
+ close(new_fd);
+ } else {
+ nread = recv(srv_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
+ CHECK(nread != sizeof(rcv_cmd),
+ "recv(sk_fds)",
+ "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
+ ev.data.u32, nread, sizeof(rcv_cmd), errno);
+ }
+
+ close(cli_fd);
+}
+
+static void test_err_inner_map(int type, sa_family_t family)
+{
+ struct cmd cmd = {
+ .reuseport_index = 0,
+ .pass_on_failure = 0,
+ };
+
+ printf("%s: ", __func__);
+ expected_results[DROP_ERR_INNER_MAP]++;
+ do_test(type, family, &cmd, DROP_ERR_INNER_MAP);
+ printf("OK\n");
+}
+
+static void test_err_skb_data(int type, sa_family_t family)
+{
+ printf("%s: ", __func__);
+ expected_results[DROP_ERR_SKB_DATA]++;
+ do_test(type, family, NULL, DROP_ERR_SKB_DATA);
+ printf("OK\n");
+}
+
+static void test_err_sk_select_port(int type, sa_family_t family)
+{
+ struct cmd cmd = {
+ .reuseport_index = REUSEPORT_ARRAY_SIZE,
+ .pass_on_failure = 0,
+ };
+
+ printf("%s: ", __func__);
+ expected_results[DROP_ERR_SK_SELECT_REUSEPORT]++;
+ do_test(type, family, &cmd, DROP_ERR_SK_SELECT_REUSEPORT);
+ printf("OK\n");
+}
+
+static void test_pass(int type, sa_family_t family)
+{
+ struct cmd cmd;
+ int i;
+
+ printf("%s: ", __func__);
+ cmd.pass_on_failure = 0;
+ for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
+ expected_results[PASS]++;
+ cmd.reuseport_index = i;
+ do_test(type, family, &cmd, PASS);
+ }
+ printf("OK\n");
+}
+
+static void test_syncookie(int type, sa_family_t family)
+{
+ int err, tmp_index = 1;
+ struct cmd cmd = {
+ .reuseport_index = 0,
+ .pass_on_failure = 0,
+ };
+
+ if (type != SOCK_STREAM)
+ return;
+
+ printf("%s: ", __func__);
+ /*
+ * +1 for TCP-SYN and
+ * +1 for the TCP-ACK (ack the syncookie)
+ */
+ expected_results[PASS] += 2;
+ enable_syncookie();
+ /*
+ * Simulate TCP-SYN and TCP-ACK are handled by two different sk:
+ * TCP-SYN: select sk_fds[tmp_index = 1] tmp_index is from the
+ * tmp_index_ovr_map
+ * TCP-ACK: select sk_fds[reuseport_index = 0] reuseport_index
+ * is from the cmd.reuseport_index
+ */
+ err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
+ &tmp_index, BPF_ANY);
+ CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
+ "err:%d errno:%d\n", err, errno);
+ do_test(type, family, &cmd, PASS);
+ err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
+ &tmp_index);
+ CHECK(err == -1 || tmp_index != -1,
+ "lookup_elem(tmp_index_ovr_map)",
+ "err:%d errno:%d tmp_index:%d\n",
+ err, errno, tmp_index);
+ disable_syncookie();
+ printf("OK\n");
+}
+
+static void test_pass_on_err(int type, sa_family_t family)
+{
+ struct cmd cmd = {
+ .reuseport_index = REUSEPORT_ARRAY_SIZE,
+ .pass_on_failure = 1,
+ };
+
+ printf("%s: ", __func__);
+ expected_results[PASS_ERR_SK_SELECT_REUSEPORT] += 1;
+ do_test(type, family, &cmd, PASS_ERR_SK_SELECT_REUSEPORT);
+ printf("OK\n");
+}
+
+static void prepare_sk_fds(int type, sa_family_t family, bool inany)
+{
+ const int first = REUSEPORT_ARRAY_SIZE - 1;
+ int i, err, optval = 1;
+ struct epoll_event ev;
+ socklen_t addrlen;
+
+ if (inany)
+ sa46_init_inany(&srv_sa, family);
+ else
+ sa46_init_loopback(&srv_sa, family);
+ addrlen = sizeof(srv_sa);
+
+ /*
+ * The sk_fds[] is filled from the back such that the order
+ * is exactly opposite to the (struct sock_reuseport *)reuse->socks[].
+ */
+ for (i = first; i >= 0; i--) {
+ sk_fds[i] = socket(family, type, 0);
+ CHECK(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n",
+ i, sk_fds[i], errno);
+ err = setsockopt(sk_fds[i], SOL_SOCKET, SO_REUSEPORT,
+ &optval, sizeof(optval));
+ CHECK(err == -1, "setsockopt(SO_REUSEPORT)",
+ "sk_fds[%d] err:%d errno:%d\n",
+ i, err, errno);
+
+ if (i == first) {
+ err = setsockopt(sk_fds[i], SOL_SOCKET,
+ SO_ATTACH_REUSEPORT_EBPF,
+ &select_by_skb_data_prog,
+ sizeof(select_by_skb_data_prog));
+ CHECK(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
+ "err:%d errno:%d\n", err, errno);
+ }
+
+ err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
+ CHECK(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
+ i, err, errno);
+
+ if (type == SOCK_STREAM) {
+ err = listen(sk_fds[i], 10);
+ CHECK(err == -1, "listen()",
+ "sk_fds[%d] err:%d errno:%d\n",
+ i, err, errno);
+ }
+
+ err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
+ BPF_NOEXIST);
+ CHECK(err == -1, "update_elem(reuseport_array)",
+ "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
+
+ if (i == first) {
+ socklen_t addrlen = sizeof(srv_sa);
+
+ err = getsockname(sk_fds[i], (struct sockaddr *)&srv_sa,
+ &addrlen);
+ CHECK(err == -1, "getsockname()",
+ "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
+ }
+ }
+
+ epfd = epoll_create(1);
+ CHECK(epfd == -1, "epoll_create(1)",
+ "epfd:%d errno:%d\n", epfd, errno);
+
+ ev.events = EPOLLIN;
+ for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
+ ev.data.u32 = i;
+ err = epoll_ctl(epfd, EPOLL_CTL_ADD, sk_fds[i], &ev);
+ CHECK(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i);
+ }
+}
+
+static void setup_per_test(int type, unsigned short family, bool inany)
+{
+ int ovr = -1, err;
+
+ prepare_sk_fds(type, family, inany);
+ err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
+ BPF_ANY);
+ CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
+ "err:%d errno:%d\n", err, errno);
+}
+
+static void cleanup_per_test(void)
+{
+ int i, err, zero = 0;
+
+ memset(expected_results, 0, sizeof(expected_results));
+
+ for (i = 0; i < NR_RESULTS; i++) {
+ err = bpf_map_update_elem(result_map, &i, &zero, BPF_ANY);
+ CHECK(err, "reset elem in result_map",
+ "i:%u err:%d errno:%d\n", i, err, errno);
+ }
+
+ err = bpf_map_update_elem(linum_map, &zero, &zero, BPF_ANY);
+ CHECK(err, "reset line number in linum_map", "err:%d errno:%d\n",
+ err, errno);
+
+ for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++)
+ close(sk_fds[i]);
+ close(epfd);
+
+ err = bpf_map_delete_elem(outer_map, &index_zero);
+ CHECK(err == -1, "delete_elem(outer_map)",
+ "err:%d errno:%d\n", err, errno);
+}
+
+static void cleanup(void)
+{
+ close(outer_map);
+ close(reuseport_array);
+ bpf_object__close(obj);
+}
+
+static void test_all(void)
+{
+ /* Extra SOCK_STREAM to test bind_inany==true */
+ const int types[] = { SOCK_STREAM, SOCK_DGRAM, SOCK_STREAM };
+ const char * const type_strings[] = { "TCP", "UDP", "TCP" };
+ const char * const family_strings[] = { "IPv6", "IPv4" };
+ const unsigned short families[] = { AF_INET6, AF_INET };
+ const bool bind_inany[] = { false, false, true };
+ int t, f, err;
+
+ for (f = 0; f < ARRAY_SIZE(families); f++) {
+ unsigned short family = families[f];
+
+ for (t = 0; t < ARRAY_SIZE(types); t++) {
+ bool inany = bind_inany[t];
+ int type = types[t];
+
+ printf("######## %s/%s %s ########\n",
+ family_strings[f], type_strings[t],
+ inany ? " INANY " : "LOOPBACK");
+
+ setup_per_test(type, family, inany);
+
+ test_err_inner_map(type, family);
+
+ /* Install reuseport_array to the outer_map */
+ err = bpf_map_update_elem(outer_map, &index_zero,
+ &reuseport_array, BPF_ANY);
+ CHECK(err == -1, "update_elem(outer_map)",
+ "err:%d errno:%d\n", err, errno);
+
+ test_err_skb_data(type, family);
+ test_err_sk_select_port(type, family);
+ test_pass(type, family);
+ test_syncookie(type, family);
+ test_pass_on_err(type, family);
+
+ cleanup_per_test();
+ printf("\n");
+ }
+ }
+}
+
+int main(int argc, const char **argv)
+{
+ create_maps();
+ prepare_bpf_obj();
+ saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
+ saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL);
+ enable_fastopen();
+ disable_syncookie();
+ atexit(restore_sysctls);
+
+ test_all();
+
+ cleanup();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_select_reuseport_common.h b/tools/testing/selftests/bpf/test_select_reuseport_common.h
new file mode 100644
index 000000000..08eb2a9f1
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_select_reuseport_common.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#ifndef __TEST_SELECT_REUSEPORT_COMMON_H
+#define __TEST_SELECT_REUSEPORT_COMMON_H
+
+#include <linux/types.h>
+
+enum result {
+ DROP_ERR_INNER_MAP,
+ DROP_ERR_SKB_DATA,
+ DROP_ERR_SK_SELECT_REUSEPORT,
+ DROP_MISC,
+ PASS,
+ PASS_ERR_SK_SELECT_REUSEPORT,
+ NR_RESULTS,
+};
+
+struct cmd {
+ __u32 reuseport_index;
+ __u32 pass_on_failure;
+};
+
+struct data_check {
+ __u32 ip_protocol;
+ __u32 skb_addrs[8];
+ __u16 skb_ports[2];
+ __u16 eth_protocol;
+ __u8 bind_inany;
+ __u8 equal_check_end[0];
+
+ __u32 len;
+ __u32 hash;
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/test_select_reuseport_kern.c
new file mode 100644
index 000000000..5b54ec637
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_select_reuseport_kern.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "bpf_endian.h"
+#include "bpf_helpers.h"
+#include "test_select_reuseport_common.h"
+
+int _version SEC("version") = 1;
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+struct bpf_map_def SEC("maps") outer_map = {
+ .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") result_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = NR_RESULTS,
+};
+
+struct bpf_map_def SEC("maps") tmp_index_ovr_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(int),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") linum_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") data_check_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct data_check),
+ .max_entries = 1,
+};
+
+#define GOTO_DONE(_result) ({ \
+ result = (_result); \
+ linum = __LINE__; \
+ goto done; \
+})
+
+SEC("select_by_skb_data")
+int _select_by_skb_data(struct sk_reuseport_md *reuse_md)
+{
+ __u32 linum, index = 0, flags = 0, index_zero = 0;
+ __u32 *result_cnt, *linum_value;
+ struct data_check data_check = {};
+ struct cmd *cmd, cmd_copy;
+ void *data, *data_end;
+ void *reuseport_array;
+ enum result result;
+ int *index_ovr;
+ int err;
+
+ data = reuse_md->data;
+ data_end = reuse_md->data_end;
+ data_check.len = reuse_md->len;
+ data_check.eth_protocol = reuse_md->eth_protocol;
+ data_check.ip_protocol = reuse_md->ip_protocol;
+ data_check.hash = reuse_md->hash;
+ data_check.bind_inany = reuse_md->bind_inany;
+ if (data_check.eth_protocol == bpf_htons(ETH_P_IP)) {
+ if (bpf_skb_load_bytes_relative(reuse_md,
+ offsetof(struct iphdr, saddr),
+ data_check.skb_addrs, 8,
+ BPF_HDR_START_NET))
+ GOTO_DONE(DROP_MISC);
+ } else {
+ if (bpf_skb_load_bytes_relative(reuse_md,
+ offsetof(struct ipv6hdr, saddr),
+ data_check.skb_addrs, 32,
+ BPF_HDR_START_NET))
+ GOTO_DONE(DROP_MISC);
+ }
+
+ /*
+ * The ip_protocol could be a compile time decision
+ * if the bpf_prog.o is dedicated to either TCP or
+ * UDP.
+ *
+ * Otherwise, reuse_md->ip_protocol or
+ * the protocol field in the iphdr can be used.
+ */
+ if (data_check.ip_protocol == IPPROTO_TCP) {
+ struct tcphdr *th = data;
+
+ if (th + 1 > data_end)
+ GOTO_DONE(DROP_MISC);
+
+ data_check.skb_ports[0] = th->source;
+ data_check.skb_ports[1] = th->dest;
+
+ if ((th->doff << 2) + sizeof(*cmd) > data_check.len)
+ GOTO_DONE(DROP_ERR_SKB_DATA);
+ if (bpf_skb_load_bytes(reuse_md, th->doff << 2, &cmd_copy,
+ sizeof(cmd_copy)))
+ GOTO_DONE(DROP_MISC);
+ cmd = &cmd_copy;
+ } else if (data_check.ip_protocol == IPPROTO_UDP) {
+ struct udphdr *uh = data;
+
+ if (uh + 1 > data_end)
+ GOTO_DONE(DROP_MISC);
+
+ data_check.skb_ports[0] = uh->source;
+ data_check.skb_ports[1] = uh->dest;
+
+ if (sizeof(struct udphdr) + sizeof(*cmd) > data_check.len)
+ GOTO_DONE(DROP_ERR_SKB_DATA);
+ if (data + sizeof(struct udphdr) + sizeof(*cmd) > data_end) {
+ if (bpf_skb_load_bytes(reuse_md, sizeof(struct udphdr),
+ &cmd_copy, sizeof(cmd_copy)))
+ GOTO_DONE(DROP_MISC);
+ cmd = &cmd_copy;
+ } else {
+ cmd = data + sizeof(struct udphdr);
+ }
+ } else {
+ GOTO_DONE(DROP_MISC);
+ }
+
+ reuseport_array = bpf_map_lookup_elem(&outer_map, &index_zero);
+ if (!reuseport_array)
+ GOTO_DONE(DROP_ERR_INNER_MAP);
+
+ index = cmd->reuseport_index;
+ index_ovr = bpf_map_lookup_elem(&tmp_index_ovr_map, &index_zero);
+ if (!index_ovr)
+ GOTO_DONE(DROP_MISC);
+
+ if (*index_ovr != -1) {
+ index = *index_ovr;
+ *index_ovr = -1;
+ }
+ err = bpf_sk_select_reuseport(reuse_md, reuseport_array, &index,
+ flags);
+ if (!err)
+ GOTO_DONE(PASS);
+
+ if (cmd->pass_on_failure)
+ GOTO_DONE(PASS_ERR_SK_SELECT_REUSEPORT);
+ else
+ GOTO_DONE(DROP_ERR_SK_SELECT_REUSEPORT);
+
+done:
+ result_cnt = bpf_map_lookup_elem(&result_map, &result);
+ if (!result_cnt)
+ return SK_DROP;
+
+ bpf_map_update_elem(&linum_map, &index_zero, &linum, BPF_ANY);
+ bpf_map_update_elem(&data_check_map, &index_zero, &data_check, BPF_ANY);
+
+ (*result_cnt)++;
+ return result < PASS ? SK_DROP : SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
new file mode 100755
index 000000000..42544a969
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018 Facebook
+
+set -eu
+
+wait_for_ip()
+{
+ local _i
+ echo -n "Wait for testing link-local IP to become available "
+ for _i in $(seq ${MAX_PING_TRIES}); do
+ echo -n "."
+ if ping -6 -q -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then
+ echo " OK"
+ return
+ fi
+ sleep 1
+ done
+ echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
+ exit 1
+}
+
+setup()
+{
+ # Create testing interfaces not to interfere with current environment.
+ ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
+ ip link set ${TEST_IF} up
+ ip link set ${TEST_IF_PEER} up
+
+ wait_for_ip
+
+ tc qdisc add dev ${TEST_IF} clsact
+ tc filter add dev ${TEST_IF} egress bpf obj ${BPF_PROG_OBJ} \
+ sec ${BPF_PROG_SECTION} da
+
+ BPF_PROG_ID=$(tc filter show dev ${TEST_IF} egress | \
+ awk '/ id / {sub(/.* id /, "", $0); print($1)}')
+}
+
+cleanup()
+{
+ ip link del ${TEST_IF} 2>/dev/null || :
+ ip link del ${TEST_IF_PEER} 2>/dev/null || :
+}
+
+main()
+{
+ trap cleanup EXIT 2 3 6 15
+ setup
+ ${PROG} ${TEST_IF} ${BPF_PROG_ID}
+}
+
+DIR=$(dirname $0)
+TEST_IF="test_cgid_1"
+TEST_IF_PEER="test_cgid_2"
+MAX_PING_TRIES=5
+BPF_PROG_OBJ="${DIR}/test_skb_cgroup_id_kern.o"
+BPF_PROG_SECTION="cgroup_id_logger"
+BPF_PROG_ID=0
+PROG="${DIR}/test_skb_cgroup_id_user"
+
+main
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c
new file mode 100644
index 000000000..68cf9829f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+
+#include <string.h>
+
+#include "bpf_helpers.h"
+
+#define NUM_CGROUP_LEVELS 4
+
+struct bpf_map_def SEC("maps") cgroup_ids = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = NUM_CGROUP_LEVELS,
+};
+
+static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
+{
+ __u64 id;
+
+ /* [1] &level passed to external function that may change it, it's
+ * incompatible with loop unroll.
+ */
+ id = bpf_skb_ancestor_cgroup_id(skb, level);
+ bpf_map_update_elem(&cgroup_ids, &level, &id, 0);
+}
+
+SEC("cgroup_id_logger")
+int log_cgroup_id(struct __sk_buff *skb)
+{
+ /* Loop unroll can't be used here due to [1]. Unrolling manually.
+ * Number of calls should be in sync with NUM_CGROUP_LEVELS.
+ */
+ log_nth_level(skb, 0);
+ log_nth_level(skb, 1);
+ log_nth_level(skb, 2);
+ log_nth_level(skb, 3);
+
+ return TC_ACT_OK;
+}
+
+int _version SEC("version") = 1;
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
new file mode 100644
index 000000000..c121cc59f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+#define CGROUP_PATH "/skb_cgroup_test"
+#define NUM_CGROUP_LEVELS 4
+
+/* RFC 4291, Section 2.7.1 */
+#define LINKLOCAL_MULTICAST "ff02::1"
+
+static int mk_dst_addr(const char *ip, const char *iface,
+ struct sockaddr_in6 *dst)
+{
+ memset(dst, 0, sizeof(*dst));
+
+ dst->sin6_family = AF_INET6;
+ dst->sin6_port = htons(1025);
+
+ if (inet_pton(AF_INET6, ip, &dst->sin6_addr) != 1) {
+ log_err("Invalid IPv6: %s", ip);
+ return -1;
+ }
+
+ dst->sin6_scope_id = if_nametoindex(iface);
+ if (!dst->sin6_scope_id) {
+ log_err("Failed to get index of iface: %s", iface);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int send_packet(const char *iface)
+{
+ struct sockaddr_in6 dst;
+ char msg[] = "msg";
+ int err = 0;
+ int fd = -1;
+
+ if (mk_dst_addr(LINKLOCAL_MULTICAST, iface, &dst))
+ goto err;
+
+ fd = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (fd == -1) {
+ log_err("Failed to create UDP socket");
+ goto err;
+ }
+
+ if (sendto(fd, &msg, sizeof(msg), 0, (const struct sockaddr *)&dst,
+ sizeof(dst)) == -1) {
+ log_err("Failed to send datagram");
+ goto err;
+ }
+
+ goto out;
+err:
+ err = -1;
+out:
+ if (fd >= 0)
+ close(fd);
+ return err;
+}
+
+int get_map_fd_by_prog_id(int prog_id)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ __u32 map_ids[1];
+ int prog_fd = -1;
+ int map_fd = -1;
+
+ prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (prog_fd < 0) {
+ log_err("Failed to get fd by prog id %d", prog_id);
+ goto err;
+ }
+
+ info.nr_map_ids = 1;
+ info.map_ids = (__u64) (unsigned long) map_ids;
+
+ if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
+ log_err("Failed to get info by prog fd %d", prog_fd);
+ goto err;
+ }
+
+ if (!info.nr_map_ids) {
+ log_err("No maps found for prog fd %d", prog_fd);
+ goto err;
+ }
+
+ map_fd = bpf_map_get_fd_by_id(map_ids[0]);
+ if (map_fd < 0)
+ log_err("Failed to get fd by map id %d", map_ids[0]);
+err:
+ if (prog_fd >= 0)
+ close(prog_fd);
+ return map_fd;
+}
+
+int check_ancestor_cgroup_ids(int prog_id)
+{
+ __u64 actual_ids[NUM_CGROUP_LEVELS], expected_ids[NUM_CGROUP_LEVELS];
+ __u32 level;
+ int err = 0;
+ int map_fd;
+
+ expected_ids[0] = 0x100000001; /* root cgroup */
+ expected_ids[1] = get_cgroup_id("");
+ expected_ids[2] = get_cgroup_id(CGROUP_PATH);
+ expected_ids[3] = 0; /* non-existent cgroup */
+
+ map_fd = get_map_fd_by_prog_id(prog_id);
+ if (map_fd < 0)
+ goto err;
+
+ for (level = 0; level < NUM_CGROUP_LEVELS; ++level) {
+ if (bpf_map_lookup_elem(map_fd, &level, &actual_ids[level])) {
+ log_err("Failed to lookup key %d", level);
+ goto err;
+ }
+ if (actual_ids[level] != expected_ids[level]) {
+ log_err("%llx (actual) != %llx (expected), level: %u\n",
+ actual_ids[level], expected_ids[level], level);
+ goto err;
+ }
+ }
+
+ goto out;
+err:
+ err = -1;
+out:
+ if (map_fd >= 0)
+ close(map_fd);
+ return err;
+}
+
+int main(int argc, char **argv)
+{
+ int cgfd = -1;
+ int err = 0;
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s iface prog_id\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ if (setup_cgroup_environment())
+ goto err;
+
+ cgfd = create_and_get_cgroup(CGROUP_PATH);
+ if (!cgfd)
+ goto err;
+
+ if (join_cgroup(CGROUP_PATH))
+ goto err;
+
+ if (send_packet(argv[1]))
+ goto err;
+
+ if (check_ancestor_cgroup_ids(atoi(argv[2])))
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(cgfd);
+ cleanup_cgroup_environment();
+ printf("[%s]\n", err ? "FAIL" : "PASS");
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
new file mode 100644
index 000000000..e95671220
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -0,0 +1,480 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <linux/filter.h>
+
+#include <bpf/bpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_endian.h"
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+
+#define CG_PATH "/foo"
+#define MAX_INSNS 512
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+struct sock_test {
+ const char *descr;
+ /* BPF prog properties */
+ struct bpf_insn insns[MAX_INSNS];
+ enum bpf_attach_type expected_attach_type;
+ enum bpf_attach_type attach_type;
+ /* Socket properties */
+ int domain;
+ int type;
+ /* Endpoint to bind() to */
+ const char *ip;
+ unsigned short port;
+ /* Expected test result */
+ enum {
+ LOAD_REJECT,
+ ATTACH_REJECT,
+ BIND_REJECT,
+ SUCCESS,
+ } result;
+};
+
+static struct sock_test tests[] = {
+ {
+ "bind4 load with invalid access: src_ip6",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip6[0])),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ LOAD_REJECT,
+ },
+ {
+ "bind4 load with invalid access: mark",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, mark)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ LOAD_REJECT,
+ },
+ {
+ "bind6 load with invalid access: src_ip4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip4)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ LOAD_REJECT,
+ },
+ {
+ "sock_create load with invalid access: src_port",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET_SOCK_CREATE,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ 0,
+ 0,
+ NULL,
+ 0,
+ LOAD_REJECT,
+ },
+ {
+ "sock_create load w/o expected_attach_type (compat mode)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ 0,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ AF_INET,
+ SOCK_STREAM,
+ "127.0.0.1",
+ 8097,
+ SUCCESS,
+ },
+ {
+ "sock_create load w/ expected_attach_type",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET_SOCK_CREATE,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ AF_INET,
+ SOCK_STREAM,
+ "127.0.0.1",
+ 8097,
+ SUCCESS,
+ },
+ {
+ "attach type mismatch bind4 vs bind6",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ ATTACH_REJECT,
+ },
+ {
+ "attach type mismatch bind6 vs bind4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ ATTACH_REJECT,
+ },
+ {
+ "attach type mismatch default vs bind4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ 0,
+ BPF_CGROUP_INET4_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ ATTACH_REJECT,
+ },
+ {
+ "attach type mismatch bind6 vs sock_create",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ 0,
+ 0,
+ NULL,
+ 0,
+ ATTACH_REJECT,
+ },
+ {
+ "bind4 reject all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ "0.0.0.0",
+ 0,
+ BIND_REJECT,
+ },
+ {
+ "bind6 reject all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ AF_INET6,
+ SOCK_STREAM,
+ "::",
+ 0,
+ BIND_REJECT,
+ },
+ {
+ "bind6 deny specific IP & port",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip6[3])),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x00000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ AF_INET6,
+ SOCK_STREAM,
+ "::1",
+ 8193,
+ BIND_REJECT,
+ },
+ {
+ "bind4 allow specific IP & port",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x7F000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ "127.0.0.1",
+ 4098,
+ SUCCESS,
+ },
+ {
+ "bind4 allow all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ "0.0.0.0",
+ 0,
+ SUCCESS,
+ },
+ {
+ "bind6 allow all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ AF_INET6,
+ SOCK_STREAM,
+ "::",
+ 0,
+ SUCCESS,
+ },
+};
+
+static size_t probe_prog_length(const struct bpf_insn *fp)
+{
+ size_t len;
+
+ for (len = MAX_INSNS - 1; len > 0; --len)
+ if (fp[len].code != 0 || fp[len].imm != 0)
+ break;
+ return len + 1;
+}
+
+static int load_sock_prog(const struct bpf_insn *prog,
+ enum bpf_attach_type attach_type)
+{
+ struct bpf_load_program_attr attr;
+
+ memset(&attr, 0, sizeof(struct bpf_load_program_attr));
+ attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
+ attr.expected_attach_type = attach_type;
+ attr.insns = prog;
+ attr.insns_cnt = probe_prog_length(attr.insns);
+ attr.license = "GPL";
+
+ return bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+}
+
+static int attach_sock_prog(int cgfd, int progfd,
+ enum bpf_attach_type attach_type)
+{
+ return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE);
+}
+
+static int bind_sock(int domain, int type, const char *ip, unsigned short port)
+{
+ struct sockaddr_storage addr;
+ struct sockaddr_in6 *addr6;
+ struct sockaddr_in *addr4;
+ int sockfd = -1;
+ socklen_t len;
+ int err = 0;
+
+ sockfd = socket(domain, type, 0);
+ if (sockfd < 0)
+ goto err;
+
+ memset(&addr, 0, sizeof(addr));
+
+ if (domain == AF_INET) {
+ len = sizeof(struct sockaddr_in);
+ addr4 = (struct sockaddr_in *)&addr;
+ addr4->sin_family = domain;
+ addr4->sin_port = htons(port);
+ if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1)
+ goto err;
+ } else if (domain == AF_INET6) {
+ len = sizeof(struct sockaddr_in6);
+ addr6 = (struct sockaddr_in6 *)&addr;
+ addr6->sin6_family = domain;
+ addr6->sin6_port = htons(port);
+ if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1)
+ goto err;
+ } else {
+ goto err;
+ }
+
+ if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1)
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(sockfd);
+ return err;
+}
+
+static int run_test_case(int cgfd, const struct sock_test *test)
+{
+ int progfd = -1;
+ int err = 0;
+
+ printf("Test case: %s .. ", test->descr);
+ progfd = load_sock_prog(test->insns, test->expected_attach_type);
+ if (progfd < 0) {
+ if (test->result == LOAD_REJECT)
+ goto out;
+ else
+ goto err;
+ }
+
+ if (attach_sock_prog(cgfd, progfd, test->attach_type) == -1) {
+ if (test->result == ATTACH_REJECT)
+ goto out;
+ else
+ goto err;
+ }
+
+ if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) {
+ /* sys_bind() may fail for different reasons, errno has to be
+ * checked to confirm that BPF program rejected it.
+ */
+ if (test->result == BIND_REJECT && errno == EPERM)
+ goto out;
+ else
+ goto err;
+ }
+
+
+ if (test->result != SUCCESS)
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ /* Detaching w/o checking return code: best effort attempt. */
+ if (progfd != -1)
+ bpf_prog_detach(cgfd, test->attach_type);
+ close(progfd);
+ printf("[%s]\n", err ? "FAIL" : "PASS");
+ return err;
+}
+
+static int run_tests(int cgfd)
+{
+ int passes = 0;
+ int fails = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+ if (run_test_case(cgfd, &tests[i]))
+ ++fails;
+ else
+ ++passes;
+ }
+ printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
+ return fails ? -1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+ int cgfd = -1;
+ int err = 0;
+
+ if (setup_cgroup_environment())
+ goto err;
+
+ cgfd = create_and_get_cgroup(CG_PATH);
+ if (!cgfd)
+ goto err;
+
+ if (join_cgroup(CG_PATH))
+ goto err;
+
+ if (run_tests(cgfd))
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(cgfd);
+ cleanup_cgroup_environment();
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
new file mode 100644
index 000000000..e38f1cb70
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -0,0 +1,1441 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+
+#include <linux/filter.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+
+#ifndef ENOTSUPP
+# define ENOTSUPP 524
+#endif
+
+#define CG_PATH "/foo"
+#define CONNECT4_PROG_PATH "./connect4_prog.o"
+#define CONNECT6_PROG_PATH "./connect6_prog.o"
+#define SENDMSG4_PROG_PATH "./sendmsg4_prog.o"
+#define SENDMSG6_PROG_PATH "./sendmsg6_prog.o"
+
+#define SERV4_IP "192.168.1.254"
+#define SERV4_REWRITE_IP "127.0.0.1"
+#define SRC4_IP "172.16.0.1"
+#define SRC4_REWRITE_IP "127.0.0.4"
+#define SERV4_PORT 4040
+#define SERV4_REWRITE_PORT 4444
+
+#define SERV6_IP "face:b00c:1234:5678::abcd"
+#define SERV6_REWRITE_IP "::1"
+#define SERV6_V4MAPPED_IP "::ffff:192.168.0.4"
+#define SRC6_IP "::1"
+#define SRC6_REWRITE_IP "::6"
+#define WILDCARD6_IP "::"
+#define SERV6_PORT 6060
+#define SERV6_REWRITE_PORT 6666
+
+#define INET_NTOP_BUF 40
+
+struct sock_addr_test;
+
+typedef int (*load_fn)(const struct sock_addr_test *test);
+typedef int (*info_fn)(int, struct sockaddr *, socklen_t *);
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+struct sock_addr_test {
+ const char *descr;
+ /* BPF prog properties */
+ load_fn loadfn;
+ enum bpf_attach_type expected_attach_type;
+ enum bpf_attach_type attach_type;
+ /* Socket properties */
+ int domain;
+ int type;
+ /* IP:port pairs for BPF prog to override */
+ const char *requested_ip;
+ unsigned short requested_port;
+ const char *expected_ip;
+ unsigned short expected_port;
+ const char *expected_src_ip;
+ /* Expected test result */
+ enum {
+ LOAD_REJECT,
+ ATTACH_REJECT,
+ SYSCALL_EPERM,
+ SYSCALL_ENOTSUPP,
+ SUCCESS,
+ } expected_result;
+};
+
+static int bind4_prog_load(const struct sock_addr_test *test);
+static int bind6_prog_load(const struct sock_addr_test *test);
+static int connect4_prog_load(const struct sock_addr_test *test);
+static int connect6_prog_load(const struct sock_addr_test *test);
+static int sendmsg_allow_prog_load(const struct sock_addr_test *test);
+static int sendmsg_deny_prog_load(const struct sock_addr_test *test);
+static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test);
+static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test);
+static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test);
+static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test);
+static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test);
+static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test);
+
+static struct sock_addr_test tests[] = {
+ /* bind */
+ {
+ "bind4: load prog with wrong expected attach type",
+ bind4_prog_load,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET4_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "bind4: attach prog with wrong attach type",
+ bind4_prog_load,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET6_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "bind4: rewrite IP & TCP port in",
+ bind4_prog_load,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET4_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ "bind4: rewrite IP & UDP port in",
+ bind4_prog_load,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET4_BIND,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ "bind6: load prog with wrong expected attach type",
+ bind6_prog_load,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET6_BIND,
+ AF_INET6,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "bind6: attach prog with wrong attach type",
+ bind6_prog_load,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET4_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "bind6: rewrite IP & TCP port in",
+ bind6_prog_load,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET6_BIND,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ "bind6: rewrite IP & UDP port in",
+ bind6_prog_load,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET6_BIND,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+
+ /* connect */
+ {
+ "connect4: load prog with wrong expected attach type",
+ connect4_prog_load,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET4_CONNECT,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "connect4: attach prog with wrong attach type",
+ connect4_prog_load,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "connect4: rewrite IP & TCP port",
+ connect4_prog_load,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET4_CONNECT,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "connect4: rewrite IP & UDP port",
+ connect4_prog_load,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET4_CONNECT,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "connect6: load prog with wrong expected attach type",
+ connect6_prog_load,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ AF_INET6,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "connect6: attach prog with wrong attach type",
+ connect6_prog_load,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET4_CONNECT,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "connect6: rewrite IP & TCP port",
+ connect6_prog_load,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "connect6: rewrite IP & UDP port",
+ connect6_prog_load,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+
+ /* sendmsg */
+ {
+ "sendmsg4: load prog with wrong expected attach type",
+ sendmsg4_rw_asm_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP4_SENDMSG,
+ AF_INET,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "sendmsg4: attach prog with wrong attach type",
+ sendmsg4_rw_asm_prog_load,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "sendmsg4: rewrite IP & port (asm)",
+ sendmsg4_rw_asm_prog_load,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP4_SENDMSG,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "sendmsg4: rewrite IP & port (C)",
+ sendmsg4_rw_c_prog_load,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP4_SENDMSG,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "sendmsg4: deny call",
+ sendmsg_deny_prog_load,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP4_SENDMSG,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ "sendmsg6: load prog with wrong expected attach type",
+ sendmsg6_rw_asm_prog_load,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "sendmsg6: attach prog with wrong attach type",
+ sendmsg6_rw_asm_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP4_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "sendmsg6: rewrite IP & port (asm)",
+ sendmsg6_rw_asm_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "sendmsg6: rewrite IP & port (C)",
+ sendmsg6_rw_c_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "sendmsg6: IPv4-mapped IPv6",
+ sendmsg6_rw_v4mapped_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_ENOTSUPP,
+ },
+ {
+ "sendmsg6: set dst IP = [::] (BSD'ism)",
+ sendmsg6_rw_wildcard_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "sendmsg6: preserve dst IP = [::] (BSD'ism)",
+ sendmsg_allow_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ WILDCARD6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_PORT,
+ SRC6_IP,
+ SUCCESS,
+ },
+ {
+ "sendmsg6: deny call",
+ sendmsg_deny_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+};
+
+static int mk_sockaddr(int domain, const char *ip, unsigned short port,
+ struct sockaddr *addr, socklen_t addr_len)
+{
+ struct sockaddr_in6 *addr6;
+ struct sockaddr_in *addr4;
+
+ if (domain != AF_INET && domain != AF_INET6) {
+ log_err("Unsupported address family");
+ return -1;
+ }
+
+ memset(addr, 0, addr_len);
+
+ if (domain == AF_INET) {
+ if (addr_len < sizeof(struct sockaddr_in))
+ return -1;
+ addr4 = (struct sockaddr_in *)addr;
+ addr4->sin_family = domain;
+ addr4->sin_port = htons(port);
+ if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1) {
+ log_err("Invalid IPv4: %s", ip);
+ return -1;
+ }
+ } else if (domain == AF_INET6) {
+ if (addr_len < sizeof(struct sockaddr_in6))
+ return -1;
+ addr6 = (struct sockaddr_in6 *)addr;
+ addr6->sin6_family = domain;
+ addr6->sin6_port = htons(port);
+ if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1) {
+ log_err("Invalid IPv6: %s", ip);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int load_insns(const struct sock_addr_test *test,
+ const struct bpf_insn *insns, size_t insns_cnt)
+{
+ struct bpf_load_program_attr load_attr;
+ int ret;
+
+ memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+ load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
+ load_attr.expected_attach_type = test->expected_attach_type;
+ load_attr.insns = insns;
+ load_attr.insns_cnt = insns_cnt;
+ load_attr.license = "GPL";
+
+ ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+ if (ret < 0 && test->expected_result != LOAD_REJECT) {
+ log_err(">>> Loading program error.\n"
+ ">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
+ }
+
+ return ret;
+}
+
+/* [1] These testing programs try to read different context fields, including
+ * narrow loads of different sizes from user_ip4 and user_ip6, and write to
+ * those allowed to be overridden.
+ *
+ * [2] BPF_LD_IMM64 & BPF_JMP_REG are used below whenever there is a need to
+ * compare a register with unsigned 32bit integer. BPF_JMP_IMM can't be used
+ * in such cases since it accepts only _signed_ 32bit integer as IMM
+ * argument. Also note that BPF_LD_IMM64 contains 2 instructions what matters
+ * to count jumps properly.
+ */
+
+static int bind4_prog_load(const struct sock_addr_test *test)
+{
+ union {
+ uint8_t u4_addr8[4];
+ uint16_t u4_addr16[2];
+ uint32_t u4_addr32;
+ } ip4;
+ struct sockaddr_in addr4_rw;
+
+ if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) {
+ log_err("Invalid IPv4: %s", SERV4_IP);
+ return -1;
+ }
+
+ if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
+ (struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1)
+ return -1;
+
+ /* See [1]. */
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (sk.family == AF_INET && */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, family)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 16),
+
+ /* (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, type)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1),
+ BPF_JMP_A(1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 12),
+
+ /* 1st_byte_of_user_ip4 == expected && */
+ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 10),
+
+ /* 1st_half_of_user_ip4 == expected && */
+ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 8),
+
+ /* whole_user_ip4 == expected) { */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip4)),
+ BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */
+ BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4),
+
+ /* user_ip4 = addr4_rw.sin_addr */
+ BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_addr.s_addr),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_ip4)),
+
+ /* user_port = addr4_rw.sin_port */
+ BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_port),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_port)),
+ /* } */
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int bind6_prog_load(const struct sock_addr_test *test)
+{
+ struct sockaddr_in6 addr6_rw;
+ struct in6_addr ip6;
+
+ if (inet_pton(AF_INET6, SERV6_IP, (void *)&ip6) != 1) {
+ log_err("Invalid IPv6: %s", SERV6_IP);
+ return -1;
+ }
+
+ if (mk_sockaddr(AF_INET6, SERV6_REWRITE_IP, SERV6_REWRITE_PORT,
+ (struct sockaddr *)&addr6_rw, sizeof(addr6_rw)) == -1)
+ return -1;
+
+ /* See [1]. */
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (sk.family == AF_INET6 && */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, family)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
+
+ /* 5th_byte_of_user_ip6 == expected && */
+ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip6[1])),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr[4], 16),
+
+ /* 3rd_half_of_user_ip6 == expected && */
+ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip6[1])),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr16[2], 14),
+
+ /* last_word_of_user_ip6 == expected) { */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip6[3])),
+ BPF_LD_IMM64(BPF_REG_8, ip6.s6_addr32[3]), /* See [2]. */
+ BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 10),
+
+
+#define STORE_IPV6_WORD(N) \
+ BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_addr.s6_addr32[N]), \
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, \
+ offsetof(struct bpf_sock_addr, user_ip6[N]))
+
+ /* user_ip6 = addr6_rw.sin6_addr */
+ STORE_IPV6_WORD(0),
+ STORE_IPV6_WORD(1),
+ STORE_IPV6_WORD(2),
+ STORE_IPV6_WORD(3),
+
+ /* user_port = addr6_rw.sin6_port */
+ BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_port),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_port)),
+
+ /* } */
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int load_path(const struct sock_addr_test *test, const char *path)
+{
+ struct bpf_prog_load_attr attr;
+ struct bpf_object *obj;
+ int prog_fd;
+
+ memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+ attr.file = path;
+ attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
+ attr.expected_attach_type = test->expected_attach_type;
+
+ if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
+ if (test->expected_result != LOAD_REJECT)
+ log_err(">>> Loading program (%s) error.\n", path);
+ return -1;
+ }
+
+ return prog_fd;
+}
+
+static int connect4_prog_load(const struct sock_addr_test *test)
+{
+ return load_path(test, CONNECT4_PROG_PATH);
+}
+
+static int connect6_prog_load(const struct sock_addr_test *test)
+{
+ return load_path(test, CONNECT6_PROG_PATH);
+}
+
+static int sendmsg_ret_only_prog_load(const struct sock_addr_test *test,
+ int32_t rc)
+{
+ struct bpf_insn insns[] = {
+ /* return rc */
+ BPF_MOV64_IMM(BPF_REG_0, rc),
+ BPF_EXIT_INSN(),
+ };
+ return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int sendmsg_allow_prog_load(const struct sock_addr_test *test)
+{
+ return sendmsg_ret_only_prog_load(test, /*rc*/ 1);
+}
+
+static int sendmsg_deny_prog_load(const struct sock_addr_test *test)
+{
+ return sendmsg_ret_only_prog_load(test, /*rc*/ 0);
+}
+
+static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test)
+{
+ struct sockaddr_in dst4_rw_addr;
+ struct in_addr src4_rw_ip;
+
+ if (inet_pton(AF_INET, SRC4_REWRITE_IP, (void *)&src4_rw_ip) != 1) {
+ log_err("Invalid IPv4: %s", SRC4_REWRITE_IP);
+ return -1;
+ }
+
+ if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
+ (struct sockaddr *)&dst4_rw_addr,
+ sizeof(dst4_rw_addr)) == -1)
+ return -1;
+
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (sk.family == AF_INET && */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, family)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 8),
+
+ /* sk.type == SOCK_DGRAM) { */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, type)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 6),
+
+ /* msg_src_ip4 = src4_rw_ip */
+ BPF_MOV32_IMM(BPF_REG_7, src4_rw_ip.s_addr),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, msg_src_ip4)),
+
+ /* user_ip4 = dst4_rw_addr.sin_addr */
+ BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_addr.s_addr),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_ip4)),
+
+ /* user_port = dst4_rw_addr.sin_port */
+ BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_port),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_port)),
+ /* } */
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test)
+{
+ return load_path(test, SENDMSG4_PROG_PATH);
+}
+
+static int sendmsg6_rw_dst_asm_prog_load(const struct sock_addr_test *test,
+ const char *rw_dst_ip)
+{
+ struct sockaddr_in6 dst6_rw_addr;
+ struct in6_addr src6_rw_ip;
+
+ if (inet_pton(AF_INET6, SRC6_REWRITE_IP, (void *)&src6_rw_ip) != 1) {
+ log_err("Invalid IPv6: %s", SRC6_REWRITE_IP);
+ return -1;
+ }
+
+ if (mk_sockaddr(AF_INET6, rw_dst_ip, SERV6_REWRITE_PORT,
+ (struct sockaddr *)&dst6_rw_addr,
+ sizeof(dst6_rw_addr)) == -1)
+ return -1;
+
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (sk.family == AF_INET6) { */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, family)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
+
+#define STORE_IPV6_WORD_N(DST, SRC, N) \
+ BPF_MOV32_IMM(BPF_REG_7, SRC[N]), \
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, \
+ offsetof(struct bpf_sock_addr, DST[N]))
+
+#define STORE_IPV6(DST, SRC) \
+ STORE_IPV6_WORD_N(DST, SRC, 0), \
+ STORE_IPV6_WORD_N(DST, SRC, 1), \
+ STORE_IPV6_WORD_N(DST, SRC, 2), \
+ STORE_IPV6_WORD_N(DST, SRC, 3)
+
+ STORE_IPV6(msg_src_ip6, src6_rw_ip.s6_addr32),
+ STORE_IPV6(user_ip6, dst6_rw_addr.sin6_addr.s6_addr32),
+
+ /* user_port = dst6_rw_addr.sin6_port */
+ BPF_MOV32_IMM(BPF_REG_7, dst6_rw_addr.sin6_port),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_port)),
+
+ /* } */
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test)
+{
+ return sendmsg6_rw_dst_asm_prog_load(test, SERV6_REWRITE_IP);
+}
+
+static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test)
+{
+ return sendmsg6_rw_dst_asm_prog_load(test, SERV6_V4MAPPED_IP);
+}
+
+static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test)
+{
+ return sendmsg6_rw_dst_asm_prog_load(test, WILDCARD6_IP);
+}
+
+static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test)
+{
+ return load_path(test, SENDMSG6_PROG_PATH);
+}
+
+static int cmp_addr(const struct sockaddr_storage *addr1,
+ const struct sockaddr_storage *addr2, int cmp_port)
+{
+ const struct sockaddr_in *four1, *four2;
+ const struct sockaddr_in6 *six1, *six2;
+
+ if (addr1->ss_family != addr2->ss_family)
+ return -1;
+
+ if (addr1->ss_family == AF_INET) {
+ four1 = (const struct sockaddr_in *)addr1;
+ four2 = (const struct sockaddr_in *)addr2;
+ return !((four1->sin_port == four2->sin_port || !cmp_port) &&
+ four1->sin_addr.s_addr == four2->sin_addr.s_addr);
+ } else if (addr1->ss_family == AF_INET6) {
+ six1 = (const struct sockaddr_in6 *)addr1;
+ six2 = (const struct sockaddr_in6 *)addr2;
+ return !((six1->sin6_port == six2->sin6_port || !cmp_port) &&
+ !memcmp(&six1->sin6_addr, &six2->sin6_addr,
+ sizeof(struct in6_addr)));
+ }
+
+ return -1;
+}
+
+static int cmp_sock_addr(info_fn fn, int sock1,
+ const struct sockaddr_storage *addr2, int cmp_port)
+{
+ struct sockaddr_storage addr1;
+ socklen_t len1 = sizeof(addr1);
+
+ memset(&addr1, 0, len1);
+ if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0)
+ return -1;
+
+ return cmp_addr(&addr1, addr2, cmp_port);
+}
+
+static int cmp_local_ip(int sock1, const struct sockaddr_storage *addr2)
+{
+ return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 0);
+}
+
+static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2)
+{
+ return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 1);
+}
+
+static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2)
+{
+ return cmp_sock_addr(getpeername, sock1, addr2, /*cmp_port*/ 1);
+}
+
+static int start_server(int type, const struct sockaddr_storage *addr,
+ socklen_t addr_len)
+{
+ int fd;
+
+ fd = socket(addr->ss_family, type, 0);
+ if (fd == -1) {
+ log_err("Failed to create server socket");
+ goto out;
+ }
+
+ if (bind(fd, (const struct sockaddr *)addr, addr_len) == -1) {
+ log_err("Failed to bind server socket");
+ goto close_out;
+ }
+
+ if (type == SOCK_STREAM) {
+ if (listen(fd, 128) == -1) {
+ log_err("Failed to listen on server socket");
+ goto close_out;
+ }
+ }
+
+ goto out;
+close_out:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+static int connect_to_server(int type, const struct sockaddr_storage *addr,
+ socklen_t addr_len)
+{
+ int domain;
+ int fd = -1;
+
+ domain = addr->ss_family;
+
+ if (domain != AF_INET && domain != AF_INET6) {
+ log_err("Unsupported address family");
+ goto err;
+ }
+
+ fd = socket(domain, type, 0);
+ if (fd == -1) {
+ log_err("Failed to create client socket");
+ goto err;
+ }
+
+ if (connect(fd, (const struct sockaddr *)addr, addr_len) == -1) {
+ log_err("Fail to connect to server");
+ goto err;
+ }
+
+ goto out;
+err:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+int init_pktinfo(int domain, struct cmsghdr *cmsg)
+{
+ struct in6_pktinfo *pktinfo6;
+ struct in_pktinfo *pktinfo4;
+
+ if (domain == AF_INET) {
+ cmsg->cmsg_level = SOL_IP;
+ cmsg->cmsg_type = IP_PKTINFO;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
+ pktinfo4 = (struct in_pktinfo *)CMSG_DATA(cmsg);
+ memset(pktinfo4, 0, sizeof(struct in_pktinfo));
+ if (inet_pton(domain, SRC4_IP,
+ (void *)&pktinfo4->ipi_spec_dst) != 1)
+ return -1;
+ } else if (domain == AF_INET6) {
+ cmsg->cmsg_level = SOL_IPV6;
+ cmsg->cmsg_type = IPV6_PKTINFO;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
+ pktinfo6 = (struct in6_pktinfo *)CMSG_DATA(cmsg);
+ memset(pktinfo6, 0, sizeof(struct in6_pktinfo));
+ if (inet_pton(domain, SRC6_IP,
+ (void *)&pktinfo6->ipi6_addr) != 1)
+ return -1;
+ } else {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int sendmsg_to_server(int type, const struct sockaddr_storage *addr,
+ socklen_t addr_len, int set_cmsg, int flags,
+ int *syscall_err)
+{
+ union {
+ char buf[CMSG_SPACE(sizeof(struct in6_pktinfo))];
+ struct cmsghdr align;
+ } control6;
+ union {
+ char buf[CMSG_SPACE(sizeof(struct in_pktinfo))];
+ struct cmsghdr align;
+ } control4;
+ struct msghdr hdr;
+ struct iovec iov;
+ char data = 'a';
+ int domain;
+ int fd = -1;
+
+ domain = addr->ss_family;
+
+ if (domain != AF_INET && domain != AF_INET6) {
+ log_err("Unsupported address family");
+ goto err;
+ }
+
+ fd = socket(domain, type, 0);
+ if (fd == -1) {
+ log_err("Failed to create client socket");
+ goto err;
+ }
+
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = &data;
+ iov.iov_len = sizeof(data);
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.msg_name = (void *)addr;
+ hdr.msg_namelen = addr_len;
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
+
+ if (set_cmsg) {
+ if (domain == AF_INET) {
+ hdr.msg_control = &control4;
+ hdr.msg_controllen = sizeof(control4.buf);
+ } else if (domain == AF_INET6) {
+ hdr.msg_control = &control6;
+ hdr.msg_controllen = sizeof(control6.buf);
+ }
+ if (init_pktinfo(domain, CMSG_FIRSTHDR(&hdr))) {
+ log_err("Fail to init pktinfo");
+ goto err;
+ }
+ }
+
+ if (sendmsg(fd, &hdr, flags) != sizeof(data)) {
+ log_err("Fail to send message to server");
+ *syscall_err = errno;
+ goto err;
+ }
+
+ goto out;
+err:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+static int fastconnect_to_server(const struct sockaddr_storage *addr,
+ socklen_t addr_len)
+{
+ int sendmsg_err;
+
+ return sendmsg_to_server(SOCK_STREAM, addr, addr_len, /*set_cmsg*/0,
+ MSG_FASTOPEN, &sendmsg_err);
+}
+
+static int recvmsg_from_client(int sockfd, struct sockaddr_storage *src_addr)
+{
+ struct timeval tv;
+ struct msghdr hdr;
+ struct iovec iov;
+ char data[64];
+ fd_set rfds;
+
+ FD_ZERO(&rfds);
+ FD_SET(sockfd, &rfds);
+
+ tv.tv_sec = 2;
+ tv.tv_usec = 0;
+
+ if (select(sockfd + 1, &rfds, NULL, NULL, &tv) <= 0 ||
+ !FD_ISSET(sockfd, &rfds))
+ return -1;
+
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = data;
+ iov.iov_len = sizeof(data);
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.msg_name = src_addr;
+ hdr.msg_namelen = sizeof(struct sockaddr_storage);
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
+
+ return recvmsg(sockfd, &hdr, 0);
+}
+
+static int init_addrs(const struct sock_addr_test *test,
+ struct sockaddr_storage *requested_addr,
+ struct sockaddr_storage *expected_addr,
+ struct sockaddr_storage *expected_src_addr)
+{
+ socklen_t addr_len = sizeof(struct sockaddr_storage);
+
+ if (mk_sockaddr(test->domain, test->expected_ip, test->expected_port,
+ (struct sockaddr *)expected_addr, addr_len) == -1)
+ goto err;
+
+ if (mk_sockaddr(test->domain, test->requested_ip, test->requested_port,
+ (struct sockaddr *)requested_addr, addr_len) == -1)
+ goto err;
+
+ if (test->expected_src_ip &&
+ mk_sockaddr(test->domain, test->expected_src_ip, 0,
+ (struct sockaddr *)expected_src_addr, addr_len) == -1)
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static int run_bind_test_case(const struct sock_addr_test *test)
+{
+ socklen_t addr_len = sizeof(struct sockaddr_storage);
+ struct sockaddr_storage requested_addr;
+ struct sockaddr_storage expected_addr;
+ int clientfd = -1;
+ int servfd = -1;
+ int err = 0;
+
+ if (init_addrs(test, &requested_addr, &expected_addr, NULL))
+ goto err;
+
+ servfd = start_server(test->type, &requested_addr, addr_len);
+ if (servfd == -1)
+ goto err;
+
+ if (cmp_local_addr(servfd, &expected_addr))
+ goto err;
+
+ /* Try to connect to server just in case */
+ clientfd = connect_to_server(test->type, &expected_addr, addr_len);
+ if (clientfd == -1)
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(clientfd);
+ close(servfd);
+ return err;
+}
+
+static int run_connect_test_case(const struct sock_addr_test *test)
+{
+ socklen_t addr_len = sizeof(struct sockaddr_storage);
+ struct sockaddr_storage expected_src_addr;
+ struct sockaddr_storage requested_addr;
+ struct sockaddr_storage expected_addr;
+ int clientfd = -1;
+ int servfd = -1;
+ int err = 0;
+
+ if (init_addrs(test, &requested_addr, &expected_addr,
+ &expected_src_addr))
+ goto err;
+
+ /* Prepare server to connect to */
+ servfd = start_server(test->type, &expected_addr, addr_len);
+ if (servfd == -1)
+ goto err;
+
+ clientfd = connect_to_server(test->type, &requested_addr, addr_len);
+ if (clientfd == -1)
+ goto err;
+
+ /* Make sure src and dst addrs were overridden properly */
+ if (cmp_peer_addr(clientfd, &expected_addr))
+ goto err;
+
+ if (cmp_local_ip(clientfd, &expected_src_addr))
+ goto err;
+
+ if (test->type == SOCK_STREAM) {
+ /* Test TCP Fast Open scenario */
+ clientfd = fastconnect_to_server(&requested_addr, addr_len);
+ if (clientfd == -1)
+ goto err;
+
+ /* Make sure src and dst addrs were overridden properly */
+ if (cmp_peer_addr(clientfd, &expected_addr))
+ goto err;
+
+ if (cmp_local_ip(clientfd, &expected_src_addr))
+ goto err;
+ }
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(clientfd);
+ close(servfd);
+ return err;
+}
+
+static int run_sendmsg_test_case(const struct sock_addr_test *test)
+{
+ socklen_t addr_len = sizeof(struct sockaddr_storage);
+ struct sockaddr_storage expected_src_addr;
+ struct sockaddr_storage requested_addr;
+ struct sockaddr_storage expected_addr;
+ struct sockaddr_storage real_src_addr;
+ int clientfd = -1;
+ int servfd = -1;
+ int set_cmsg;
+ int err = 0;
+
+ if (test->type != SOCK_DGRAM)
+ goto err;
+
+ if (init_addrs(test, &requested_addr, &expected_addr,
+ &expected_src_addr))
+ goto err;
+
+ /* Prepare server to sendmsg to */
+ servfd = start_server(test->type, &expected_addr, addr_len);
+ if (servfd == -1)
+ goto err;
+
+ for (set_cmsg = 0; set_cmsg <= 1; ++set_cmsg) {
+ if (clientfd >= 0)
+ close(clientfd);
+
+ clientfd = sendmsg_to_server(test->type, &requested_addr,
+ addr_len, set_cmsg, /*flags*/0,
+ &err);
+ if (err)
+ goto out;
+ else if (clientfd == -1)
+ goto err;
+
+ /* Try to receive message on server instead of using
+ * getpeername(2) on client socket, to check that client's
+ * destination address was rewritten properly, since
+ * getpeername(2) doesn't work with unconnected datagram
+ * sockets.
+ *
+ * Get source address from recvmsg(2) as well to make sure
+ * source was rewritten properly: getsockname(2) can't be used
+ * since socket is unconnected and source defined for one
+ * specific packet may differ from the one used by default and
+ * returned by getsockname(2).
+ */
+ if (recvmsg_from_client(servfd, &real_src_addr) == -1)
+ goto err;
+
+ if (cmp_addr(&real_src_addr, &expected_src_addr, /*cmp_port*/0))
+ goto err;
+ }
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(clientfd);
+ close(servfd);
+ return err;
+}
+
+static int run_test_case(int cgfd, const struct sock_addr_test *test)
+{
+ int progfd = -1;
+ int err = 0;
+
+ printf("Test case: %s .. ", test->descr);
+
+ progfd = test->loadfn(test);
+ if (test->expected_result == LOAD_REJECT && progfd < 0)
+ goto out;
+ else if (test->expected_result == LOAD_REJECT || progfd < 0)
+ goto err;
+
+ err = bpf_prog_attach(progfd, cgfd, test->attach_type,
+ BPF_F_ALLOW_OVERRIDE);
+ if (test->expected_result == ATTACH_REJECT && err) {
+ err = 0; /* error was expected, reset it */
+ goto out;
+ } else if (test->expected_result == ATTACH_REJECT || err) {
+ goto err;
+ }
+
+ switch (test->attach_type) {
+ case BPF_CGROUP_INET4_BIND:
+ case BPF_CGROUP_INET6_BIND:
+ err = run_bind_test_case(test);
+ break;
+ case BPF_CGROUP_INET4_CONNECT:
+ case BPF_CGROUP_INET6_CONNECT:
+ err = run_connect_test_case(test);
+ break;
+ case BPF_CGROUP_UDP4_SENDMSG:
+ case BPF_CGROUP_UDP6_SENDMSG:
+ err = run_sendmsg_test_case(test);
+ break;
+ default:
+ goto err;
+ }
+
+ if (test->expected_result == SYSCALL_EPERM && err == EPERM) {
+ err = 0; /* error was expected, reset it */
+ goto out;
+ }
+
+ if (test->expected_result == SYSCALL_ENOTSUPP && err == ENOTSUPP) {
+ err = 0; /* error was expected, reset it */
+ goto out;
+ }
+
+ if (err || test->expected_result != SUCCESS)
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ /* Detaching w/o checking return code: best effort attempt. */
+ if (progfd != -1)
+ bpf_prog_detach(cgfd, test->attach_type);
+ close(progfd);
+ printf("[%s]\n", err ? "FAIL" : "PASS");
+ return err;
+}
+
+static int run_tests(int cgfd)
+{
+ int passes = 0;
+ int fails = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+ if (run_test_case(cgfd, &tests[i]))
+ ++fails;
+ else
+ ++passes;
+ }
+ printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
+ return fails ? -1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+ int cgfd = -1;
+ int err = 0;
+
+ if (argc < 2) {
+ fprintf(stderr,
+ "%s has to be run via %s.sh. Skip direct run.\n",
+ argv[0], argv[0]);
+ exit(err);
+ }
+
+ if (setup_cgroup_environment())
+ goto err;
+
+ cgfd = create_and_get_cgroup(CG_PATH);
+ if (!cgfd)
+ goto err;
+
+ if (join_cgroup(CG_PATH))
+ goto err;
+
+ if (run_tests(cgfd))
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(cgfd);
+ cleanup_cgroup_environment();
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh
new file mode 100755
index 000000000..9832a875a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock_addr.sh
@@ -0,0 +1,57 @@
+#!/bin/sh
+
+set -eu
+
+ping_once()
+{
+ ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
+}
+
+wait_for_ip()
+{
+ local _i
+ echo -n "Wait for testing IPv4/IPv6 to become available "
+ for _i in $(seq ${MAX_PING_TRIES}); do
+ echo -n "."
+ if ping_once 4 ${TEST_IPv4} && ping_once 6 ${TEST_IPv6}; then
+ echo " OK"
+ return
+ fi
+ done
+ echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
+ exit 1
+}
+
+setup()
+{
+ # Create testing interfaces not to interfere with current environment.
+ ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
+ ip link set ${TEST_IF} up
+ ip link set ${TEST_IF_PEER} up
+
+ ip -4 addr add ${TEST_IPv4} dev ${TEST_IF}
+ ip -6 addr add ${TEST_IPv6} dev ${TEST_IF}
+ wait_for_ip
+}
+
+cleanup()
+{
+ ip link del ${TEST_IF} 2>/dev/null || :
+ ip link del ${TEST_IF_PEER} 2>/dev/null || :
+}
+
+main()
+{
+ trap cleanup EXIT 2 3 6 15
+ setup
+ ./test_sock_addr setup_done
+}
+
+BASENAME=$(basename $0 .sh)
+TEST_IF="${BASENAME}1"
+TEST_IF_PEER="${BASENAME}2"
+TEST_IPv4="127.0.0.4/8"
+TEST_IPv6="::6/128"
+MAX_PING_TRIES=5
+
+main
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
new file mode 100644
index 000000000..68e108e46
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH "/foo"
+#define SOCKET_COOKIE_PROG "./socket_cookie_prog.o"
+
+static int start_server(void)
+{
+ struct sockaddr_in6 addr;
+ int fd;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1) {
+ log_err("Failed to create server socket");
+ goto out;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sin6_family = AF_INET6;
+ addr.sin6_addr = in6addr_loopback;
+ addr.sin6_port = 0;
+
+ if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) == -1) {
+ log_err("Failed to bind server socket");
+ goto close_out;
+ }
+
+ if (listen(fd, 128) == -1) {
+ log_err("Failed to listen on server socket");
+ goto close_out;
+ }
+
+ goto out;
+
+close_out:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+static int connect_to_server(int server_fd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int fd;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1) {
+ log_err("Failed to create client socket");
+ goto out;
+ }
+
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get server addr");
+ goto close_out;
+ }
+
+ if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
+ log_err("Fail to connect to server");
+ goto close_out;
+ }
+
+ goto out;
+
+close_out:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+static int validate_map(struct bpf_map *map, int client_fd)
+{
+ __u32 cookie_expected_value;
+ struct sockaddr_in6 addr;
+ socklen_t len = sizeof(addr);
+ __u32 cookie_value;
+ __u64 cookie_key;
+ int err = 0;
+ int map_fd;
+
+ if (!map) {
+ log_err("Map not found in BPF object");
+ goto err;
+ }
+
+ map_fd = bpf_map__fd(map);
+
+ err = bpf_map_get_next_key(map_fd, NULL, &cookie_key);
+ if (err) {
+ log_err("Can't get cookie key from map");
+ goto out;
+ }
+
+ err = bpf_map_lookup_elem(map_fd, &cookie_key, &cookie_value);
+ if (err) {
+ log_err("Can't get cookie value from map");
+ goto out;
+ }
+
+ err = getsockname(client_fd, (struct sockaddr *)&addr, &len);
+ if (err) {
+ log_err("Can't get client local addr");
+ goto out;
+ }
+
+ cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF;
+ if (cookie_value != cookie_expected_value) {
+ log_err("Unexpected value in map: %x != %x", cookie_value,
+ cookie_expected_value);
+ goto err;
+ }
+
+ goto out;
+err:
+ err = -1;
+out:
+ return err;
+}
+
+static int run_test(int cgfd)
+{
+ enum bpf_attach_type attach_type;
+ struct bpf_prog_load_attr attr;
+ struct bpf_program *prog;
+ struct bpf_object *pobj;
+ const char *prog_name;
+ int server_fd = -1;
+ int client_fd = -1;
+ int prog_fd = -1;
+ int err = 0;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.file = SOCKET_COOKIE_PROG;
+ attr.prog_type = BPF_PROG_TYPE_UNSPEC;
+
+ err = bpf_prog_load_xattr(&attr, &pobj, &prog_fd);
+ if (err) {
+ log_err("Failed to load %s", attr.file);
+ goto out;
+ }
+
+ bpf_object__for_each_program(prog, pobj) {
+ prog_name = bpf_program__title(prog, /*needs_copy*/ false);
+
+ if (strcmp(prog_name, "cgroup/connect6") == 0) {
+ attach_type = BPF_CGROUP_INET6_CONNECT;
+ } else if (strcmp(prog_name, "sockops") == 0) {
+ attach_type = BPF_CGROUP_SOCK_OPS;
+ } else {
+ log_err("Unexpected prog: %s", prog_name);
+ goto err;
+ }
+
+ err = bpf_prog_attach(bpf_program__fd(prog), cgfd, attach_type,
+ BPF_F_ALLOW_OVERRIDE);
+ if (err) {
+ log_err("Failed to attach prog %s", prog_name);
+ goto out;
+ }
+ }
+
+ server_fd = start_server();
+ if (server_fd == -1)
+ goto err;
+
+ client_fd = connect_to_server(server_fd);
+ if (client_fd == -1)
+ goto err;
+
+ if (validate_map(bpf_map__next(NULL, pobj), client_fd))
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(client_fd);
+ close(server_fd);
+ bpf_object__close(pobj);
+ printf("%s\n", err ? "FAILED" : "PASSED");
+ return err;
+}
+
+int main(int argc, char **argv)
+{
+ int cgfd = -1;
+ int err = 0;
+
+ if (setup_cgroup_environment())
+ goto err;
+
+ cgfd = create_and_get_cgroup(CG_PATH);
+ if (!cgfd)
+ goto err;
+
+ if (join_cgroup(CG_PATH))
+ goto err;
+
+ if (run_test(cgfd))
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(cgfd);
+ cleanup_cgroup_environment();
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sockhash_kern.c b/tools/testing/selftests/bpf/test_sockhash_kern.c
new file mode 100644
index 000000000..e67559164
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockhash_kern.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+#undef SOCKMAP
+#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKHASH
+#include "./test_sockmap_kern.h"
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
new file mode 100644
index 000000000..a7fc91bb9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -0,0 +1,1527 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <sched.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/sendfile.h>
+
+#include <linux/netlink.h>
+#include <linux/socket.h>
+#include <linux/sock_diag.h>
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <libgen.h>
+
+#include <getopt.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+int running;
+static void running_handler(int a);
+
+/* randomly selected ports for testing on lo */
+#define S1_PORT 10000
+#define S2_PORT 10001
+
+#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o"
+#define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o"
+#define CG_PATH "/sockmap"
+
+/* global sockets */
+int s1, s2, c1, c2, p1, p2;
+int test_cnt;
+int passed;
+int failed;
+int map_fd[8];
+struct bpf_map *maps[8];
+int prog_fd[11];
+
+int txmsg_pass;
+int txmsg_noisy;
+int txmsg_redir;
+int txmsg_redir_noisy;
+int txmsg_drop;
+int txmsg_apply;
+int txmsg_cork;
+int txmsg_start;
+int txmsg_end;
+int txmsg_ingress;
+int txmsg_skb;
+
+static const struct option long_options[] = {
+ {"help", no_argument, NULL, 'h' },
+ {"cgroup", required_argument, NULL, 'c' },
+ {"rate", required_argument, NULL, 'r' },
+ {"verbose", no_argument, NULL, 'v' },
+ {"iov_count", required_argument, NULL, 'i' },
+ {"length", required_argument, NULL, 'l' },
+ {"test", required_argument, NULL, 't' },
+ {"data_test", no_argument, NULL, 'd' },
+ {"txmsg", no_argument, &txmsg_pass, 1 },
+ {"txmsg_noisy", no_argument, &txmsg_noisy, 1 },
+ {"txmsg_redir", no_argument, &txmsg_redir, 1 },
+ {"txmsg_redir_noisy", no_argument, &txmsg_redir_noisy, 1},
+ {"txmsg_drop", no_argument, &txmsg_drop, 1 },
+ {"txmsg_apply", required_argument, NULL, 'a'},
+ {"txmsg_cork", required_argument, NULL, 'k'},
+ {"txmsg_start", required_argument, NULL, 's'},
+ {"txmsg_end", required_argument, NULL, 'e'},
+ {"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
+ {"txmsg_skb", no_argument, &txmsg_skb, 1 },
+ {0, 0, NULL, 0 }
+};
+
+static void usage(char *argv[])
+{
+ int i;
+
+ printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
+ printf(" options:\n");
+ for (i = 0; long_options[i].name != 0; i++) {
+ printf(" --%-12s", long_options[i].name);
+ if (long_options[i].flag != NULL)
+ printf(" flag (internal value:%d)\n",
+ *long_options[i].flag);
+ else
+ printf(" -%c\n", long_options[i].val);
+ }
+ printf("\n");
+}
+
+static int sockmap_init_sockets(int verbose)
+{
+ int i, err, one = 1;
+ struct sockaddr_in addr;
+ int *fds[4] = {&s1, &s2, &c1, &c2};
+
+ s1 = s2 = p1 = p2 = c1 = c2 = 0;
+
+ /* Init sockets */
+ for (i = 0; i < 4; i++) {
+ *fds[i] = socket(AF_INET, SOCK_STREAM, 0);
+ if (*fds[i] < 0) {
+ perror("socket s1 failed()");
+ return errno;
+ }
+ }
+
+ /* Allow reuse */
+ for (i = 0; i < 2; i++) {
+ err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
+ (char *)&one, sizeof(one));
+ if (err) {
+ perror("setsockopt failed()");
+ return errno;
+ }
+ }
+
+ /* Non-blocking sockets */
+ for (i = 0; i < 2; i++) {
+ err = ioctl(*fds[i], FIONBIO, (char *)&one);
+ if (err < 0) {
+ perror("ioctl s1 failed()");
+ return errno;
+ }
+ }
+
+ /* Bind server sockets */
+ memset(&addr, 0, sizeof(struct sockaddr_in));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+
+ addr.sin_port = htons(S1_PORT);
+ err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
+ if (err < 0) {
+ perror("bind s1 failed()\n");
+ return errno;
+ }
+
+ addr.sin_port = htons(S2_PORT);
+ err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
+ if (err < 0) {
+ perror("bind s2 failed()\n");
+ return errno;
+ }
+
+ /* Listen server sockets */
+ addr.sin_port = htons(S1_PORT);
+ err = listen(s1, 32);
+ if (err < 0) {
+ perror("listen s1 failed()\n");
+ return errno;
+ }
+
+ addr.sin_port = htons(S2_PORT);
+ err = listen(s2, 32);
+ if (err < 0) {
+ perror("listen s1 failed()\n");
+ return errno;
+ }
+
+ /* Initiate Connect */
+ addr.sin_port = htons(S1_PORT);
+ err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
+ if (err < 0 && errno != EINPROGRESS) {
+ perror("connect c1 failed()\n");
+ return errno;
+ }
+
+ addr.sin_port = htons(S2_PORT);
+ err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
+ if (err < 0 && errno != EINPROGRESS) {
+ perror("connect c2 failed()\n");
+ return errno;
+ } else if (err < 0) {
+ err = 0;
+ }
+
+ /* Accept Connecrtions */
+ p1 = accept(s1, NULL, NULL);
+ if (p1 < 0) {
+ perror("accept s1 failed()\n");
+ return errno;
+ }
+
+ p2 = accept(s2, NULL, NULL);
+ if (p2 < 0) {
+ perror("accept s1 failed()\n");
+ return errno;
+ }
+
+ if (verbose) {
+ printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
+ printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
+ c1, s1, c2, s2);
+ }
+ return 0;
+}
+
+struct msg_stats {
+ size_t bytes_sent;
+ size_t bytes_recvd;
+ struct timespec start;
+ struct timespec end;
+};
+
+struct sockmap_options {
+ int verbose;
+ bool base;
+ bool sendpage;
+ bool data_test;
+ bool drop_expected;
+ int iov_count;
+ int iov_length;
+ int rate;
+};
+
+static int msg_loop_sendpage(int fd, int iov_length, int cnt,
+ struct msg_stats *s,
+ struct sockmap_options *opt)
+{
+ bool drop = opt->drop_expected;
+ unsigned char k = 0;
+ FILE *file;
+ int i, fp;
+
+ file = fopen(".sendpage_tst.tmp", "w+");
+ if (!file) {
+ perror("create file for sendpage");
+ return 1;
+ }
+ for (i = 0; i < iov_length * cnt; i++, k++)
+ fwrite(&k, sizeof(char), 1, file);
+ fflush(file);
+ fseek(file, 0, SEEK_SET);
+ fclose(file);
+
+ fp = open(".sendpage_tst.tmp", O_RDONLY);
+ if (fp < 0) {
+ perror("reopen file for sendpage");
+ return 1;
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &s->start);
+ for (i = 0; i < cnt; i++) {
+ int sent = sendfile(fd, fp, NULL, iov_length);
+
+ if (!drop && sent < 0) {
+ perror("send loop error:");
+ close(fp);
+ return sent;
+ } else if (drop && sent >= 0) {
+ printf("sendpage loop error expected: %i\n", sent);
+ close(fp);
+ return -EIO;
+ }
+
+ if (sent > 0)
+ s->bytes_sent += sent;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ close(fp);
+ return 0;
+}
+
+static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
+ struct msg_stats *s, bool tx,
+ struct sockmap_options *opt)
+{
+ struct msghdr msg = {0};
+ int err, i, flags = MSG_NOSIGNAL;
+ struct iovec *iov;
+ unsigned char k;
+ bool data_test = opt->data_test;
+ bool drop = opt->drop_expected;
+
+ iov = calloc(iov_count, sizeof(struct iovec));
+ if (!iov)
+ return errno;
+
+ k = 0;
+ for (i = 0; i < iov_count; i++) {
+ unsigned char *d = calloc(iov_length, sizeof(char));
+
+ if (!d) {
+ fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
+ goto out_errno;
+ }
+ iov[i].iov_base = d;
+ iov[i].iov_len = iov_length;
+
+ if (data_test && tx) {
+ int j;
+
+ for (j = 0; j < iov_length; j++)
+ d[j] = k++;
+ }
+ }
+
+ msg.msg_iov = iov;
+ msg.msg_iovlen = iov_count;
+ k = 0;
+
+ if (tx) {
+ clock_gettime(CLOCK_MONOTONIC, &s->start);
+ for (i = 0; i < cnt; i++) {
+ int sent = sendmsg(fd, &msg, flags);
+
+ if (!drop && sent < 0) {
+ perror("send loop error:");
+ goto out_errno;
+ } else if (drop && sent >= 0) {
+ printf("send loop error expected: %i\n", sent);
+ errno = -EIO;
+ goto out_errno;
+ }
+ if (sent > 0)
+ s->bytes_sent += sent;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ } else {
+ int slct, recv, max_fd = fd;
+ int fd_flags = O_NONBLOCK;
+ struct timeval timeout;
+ float total_bytes;
+ int bytes_cnt = 0;
+ int chunk_sz;
+ fd_set w;
+
+ if (opt->sendpage)
+ chunk_sz = iov_length * cnt;
+ else
+ chunk_sz = iov_length * iov_count;
+
+ fcntl(fd, fd_flags);
+ total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
+ err = clock_gettime(CLOCK_MONOTONIC, &s->start);
+ if (err < 0)
+ perror("recv start time: ");
+ while (s->bytes_recvd < total_bytes) {
+ if (txmsg_cork) {
+ timeout.tv_sec = 0;
+ timeout.tv_usec = 300000;
+ } else {
+ timeout.tv_sec = 1;
+ timeout.tv_usec = 0;
+ }
+
+ /* FD sets */
+ FD_ZERO(&w);
+ FD_SET(fd, &w);
+
+ slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
+ if (slct == -1) {
+ perror("select()");
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ goto out_errno;
+ } else if (!slct) {
+ if (opt->verbose)
+ fprintf(stderr, "unexpected timeout\n");
+ errno = -EIO;
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ goto out_errno;
+ }
+
+ recv = recvmsg(fd, &msg, flags);
+ if (recv < 0) {
+ if (errno != EWOULDBLOCK) {
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ perror("recv failed()\n");
+ goto out_errno;
+ }
+ }
+
+ s->bytes_recvd += recv;
+
+ if (data_test) {
+ int j;
+
+ for (i = 0; i < msg.msg_iovlen; i++) {
+ unsigned char *d = iov[i].iov_base;
+
+ for (j = 0;
+ j < iov[i].iov_len && recv; j++) {
+ if (d[j] != k++) {
+ errno = -EIO;
+ fprintf(stderr,
+ "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
+ i, j, d[j], k - 1, d[j+1], k);
+ goto out_errno;
+ }
+ bytes_cnt++;
+ if (bytes_cnt == chunk_sz) {
+ k = 0;
+ bytes_cnt = 0;
+ }
+ recv--;
+ }
+ }
+ }
+ }
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ }
+
+ for (i = 0; i < iov_count; i++)
+ free(iov[i].iov_base);
+ free(iov);
+ return 0;
+out_errno:
+ for (i = 0; i < iov_count; i++)
+ free(iov[i].iov_base);
+ free(iov);
+ return errno;
+}
+
+static float giga = 1000000000;
+
+static inline float sentBps(struct msg_stats s)
+{
+ return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static inline float recvdBps(struct msg_stats s)
+{
+ return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static int sendmsg_test(struct sockmap_options *opt)
+{
+ float sent_Bps = 0, recvd_Bps = 0;
+ int rx_fd, txpid, rxpid, err = 0;
+ struct msg_stats s = {0};
+ int iov_count = opt->iov_count;
+ int iov_buf = opt->iov_length;
+ int rx_status, tx_status;
+ int cnt = opt->rate;
+
+ errno = 0;
+
+ if (opt->base)
+ rx_fd = p1;
+ else
+ rx_fd = p2;
+
+ rxpid = fork();
+ if (rxpid == 0) {
+ if (opt->drop_expected)
+ exit(0);
+
+ if (opt->sendpage)
+ iov_count = 1;
+ err = msg_loop(rx_fd, iov_count, iov_buf,
+ cnt, &s, false, opt);
+ if (err && opt->verbose)
+ fprintf(stderr,
+ "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
+ iov_count, iov_buf, cnt, err);
+ shutdown(p2, SHUT_RDWR);
+ shutdown(p1, SHUT_RDWR);
+ if (s.end.tv_sec - s.start.tv_sec) {
+ sent_Bps = sentBps(s);
+ recvd_Bps = recvdBps(s);
+ }
+ if (opt->verbose)
+ fprintf(stdout,
+ "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
+ s.bytes_sent, sent_Bps, sent_Bps/giga,
+ s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+ if (err && txmsg_cork)
+ err = 0;
+ exit(err ? 1 : 0);
+ } else if (rxpid == -1) {
+ perror("msg_loop_rx: ");
+ return errno;
+ }
+
+ txpid = fork();
+ if (txpid == 0) {
+ if (opt->sendpage)
+ err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
+ else
+ err = msg_loop(c1, iov_count, iov_buf,
+ cnt, &s, true, opt);
+
+ if (err)
+ fprintf(stderr,
+ "msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
+ iov_count, iov_buf, cnt, err);
+ shutdown(c1, SHUT_RDWR);
+ if (s.end.tv_sec - s.start.tv_sec) {
+ sent_Bps = sentBps(s);
+ recvd_Bps = recvdBps(s);
+ }
+ if (opt->verbose)
+ fprintf(stdout,
+ "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
+ s.bytes_sent, sent_Bps, sent_Bps/giga,
+ s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+ exit(err ? 1 : 0);
+ } else if (txpid == -1) {
+ perror("msg_loop_tx: ");
+ return errno;
+ }
+
+ assert(waitpid(rxpid, &rx_status, 0) == rxpid);
+ assert(waitpid(txpid, &tx_status, 0) == txpid);
+ if (WIFEXITED(rx_status)) {
+ err = WEXITSTATUS(rx_status);
+ if (err) {
+ fprintf(stderr, "rx thread exited with err %d. ", err);
+ goto out;
+ }
+ }
+ if (WIFEXITED(tx_status)) {
+ err = WEXITSTATUS(tx_status);
+ if (err)
+ fprintf(stderr, "tx thread exited with err %d. ", err);
+ }
+out:
+ return err;
+}
+
+static int forever_ping_pong(int rate, struct sockmap_options *opt)
+{
+ struct timeval timeout;
+ char buf[1024] = {0};
+ int sc;
+
+ timeout.tv_sec = 10;
+ timeout.tv_usec = 0;
+
+ /* Ping/Pong data from client to server */
+ sc = send(c1, buf, sizeof(buf), 0);
+ if (sc < 0) {
+ perror("send failed()\n");
+ return sc;
+ }
+
+ do {
+ int s, rc, i, max_fd = p2;
+ fd_set w;
+
+ /* FD sets */
+ FD_ZERO(&w);
+ FD_SET(c1, &w);
+ FD_SET(c2, &w);
+ FD_SET(p1, &w);
+ FD_SET(p2, &w);
+
+ s = select(max_fd + 1, &w, NULL, NULL, &timeout);
+ if (s == -1) {
+ perror("select()");
+ break;
+ } else if (!s) {
+ fprintf(stderr, "unexpected timeout\n");
+ break;
+ }
+
+ for (i = 0; i <= max_fd && s > 0; ++i) {
+ if (!FD_ISSET(i, &w))
+ continue;
+
+ s--;
+
+ rc = recv(i, buf, sizeof(buf), 0);
+ if (rc < 0) {
+ if (errno != EWOULDBLOCK) {
+ perror("recv failed()\n");
+ return rc;
+ }
+ }
+
+ if (rc == 0) {
+ close(i);
+ break;
+ }
+
+ sc = send(i, buf, rc, 0);
+ if (sc < 0) {
+ perror("send failed()\n");
+ return sc;
+ }
+ }
+
+ if (rate)
+ sleep(rate);
+
+ if (opt->verbose) {
+ printf(".");
+ fflush(stdout);
+
+ }
+ } while (running);
+
+ return 0;
+}
+
+enum {
+ PING_PONG,
+ SENDMSG,
+ BASE,
+ BASE_SENDPAGE,
+ SENDPAGE,
+};
+
+static int run_options(struct sockmap_options *options, int cg_fd, int test)
+{
+ int i, key, next_key, err, tx_prog_fd = -1, zero = 0;
+
+ /* If base test skip BPF setup */
+ if (test == BASE || test == BASE_SENDPAGE)
+ goto run;
+
+ /* Attach programs to sockmap */
+ err = bpf_prog_attach(prog_fd[0], map_fd[0],
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
+ prog_fd[0], map_fd[0], err, strerror(errno));
+ return err;
+ }
+
+ err = bpf_prog_attach(prog_fd[1], map_fd[0],
+ BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err) {
+ fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
+ err, strerror(errno));
+ return err;
+ }
+
+ /* Attach to cgroups */
+ err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+ if (err) {
+ fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
+ err, strerror(errno));
+ return err;
+ }
+
+run:
+ err = sockmap_init_sockets(options->verbose);
+ if (err) {
+ fprintf(stderr, "ERROR: test socket failed: %d\n", err);
+ goto out;
+ }
+
+ /* Attach txmsg program to sockmap */
+ if (txmsg_pass)
+ tx_prog_fd = prog_fd[3];
+ else if (txmsg_noisy)
+ tx_prog_fd = prog_fd[4];
+ else if (txmsg_redir)
+ tx_prog_fd = prog_fd[5];
+ else if (txmsg_redir_noisy)
+ tx_prog_fd = prog_fd[6];
+ else if (txmsg_drop)
+ tx_prog_fd = prog_fd[9];
+ /* apply and cork must be last */
+ else if (txmsg_apply)
+ tx_prog_fd = prog_fd[7];
+ else if (txmsg_cork)
+ tx_prog_fd = prog_fd[8];
+ else
+ tx_prog_fd = 0;
+
+ if (tx_prog_fd) {
+ int redir_fd, i = 0;
+
+ err = bpf_prog_attach(tx_prog_fd,
+ map_fd[1], BPF_SK_MSG_VERDICT, 0);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
+ err, strerror(errno));
+ goto out;
+ }
+
+ err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
+ err, strerror(errno));
+ goto out;
+ }
+
+ if (txmsg_redir || txmsg_redir_noisy)
+ redir_fd = c2;
+ else
+ redir_fd = c1;
+
+ err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
+ err, strerror(errno));
+ goto out;
+ }
+
+ if (txmsg_apply) {
+ err = bpf_map_update_elem(map_fd[3],
+ &i, &txmsg_apply, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (apply_bytes): %d (%s\n",
+ err, strerror(errno));
+ goto out;
+ }
+ }
+
+ if (txmsg_cork) {
+ err = bpf_map_update_elem(map_fd[4],
+ &i, &txmsg_cork, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (cork_bytes): %d (%s\n",
+ err, strerror(errno));
+ goto out;
+ }
+ }
+
+ if (txmsg_start) {
+ err = bpf_map_update_elem(map_fd[5],
+ &i, &txmsg_start, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_start): %d (%s)\n",
+ err, strerror(errno));
+ goto out;
+ }
+ }
+
+ if (txmsg_end) {
+ i = 1;
+ err = bpf_map_update_elem(map_fd[5],
+ &i, &txmsg_end, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_end): %d (%s)\n",
+ err, strerror(errno));
+ goto out;
+ }
+ }
+
+ if (txmsg_ingress) {
+ int in = BPF_F_INGRESS;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+ err, strerror(errno));
+ }
+ i = 1;
+ err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
+ err, strerror(errno));
+ }
+ err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
+ err, strerror(errno));
+ }
+
+ i = 2;
+ err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
+ err, strerror(errno));
+ }
+ }
+
+ if (txmsg_skb) {
+ int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
+ p2 : p1;
+ int ingress = BPF_F_INGRESS;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd[7],
+ &i, &ingress, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+ err, strerror(errno));
+ }
+
+ i = 3;
+ err = bpf_map_update_elem(map_fd[0],
+ &i, &skb_fd, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+ err, strerror(errno));
+ }
+ }
+ }
+
+ if (txmsg_drop)
+ options->drop_expected = true;
+
+ if (test == PING_PONG)
+ err = forever_ping_pong(options->rate, options);
+ else if (test == SENDMSG) {
+ options->base = false;
+ options->sendpage = false;
+ err = sendmsg_test(options);
+ } else if (test == SENDPAGE) {
+ options->base = false;
+ options->sendpage = true;
+ err = sendmsg_test(options);
+ } else if (test == BASE) {
+ options->base = true;
+ options->sendpage = false;
+ err = sendmsg_test(options);
+ } else if (test == BASE_SENDPAGE) {
+ options->base = true;
+ options->sendpage = true;
+ err = sendmsg_test(options);
+ } else
+ fprintf(stderr, "unknown test\n");
+out:
+ /* Detatch and zero all the maps */
+ bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
+ bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
+ bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
+ if (tx_prog_fd >= 0)
+ bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
+
+ for (i = 0; i < 8; i++) {
+ key = next_key = 0;
+ bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+ while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
+ bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+ key = next_key;
+ }
+ }
+
+ close(s1);
+ close(s2);
+ close(p1);
+ close(p2);
+ close(c1);
+ close(c2);
+ return err;
+}
+
+static char *test_to_str(int test)
+{
+ switch (test) {
+ case SENDMSG:
+ return "sendmsg";
+ case SENDPAGE:
+ return "sendpage";
+ }
+ return "unknown";
+}
+
+#define OPTSTRING 60
+static void test_options(char *options)
+{
+ char tstr[OPTSTRING];
+
+ memset(options, 0, OPTSTRING);
+
+ if (txmsg_pass)
+ strncat(options, "pass,", OPTSTRING);
+ if (txmsg_noisy)
+ strncat(options, "pass_noisy,", OPTSTRING);
+ if (txmsg_redir)
+ strncat(options, "redir,", OPTSTRING);
+ if (txmsg_redir_noisy)
+ strncat(options, "redir_noisy,", OPTSTRING);
+ if (txmsg_drop)
+ strncat(options, "drop,", OPTSTRING);
+ if (txmsg_apply) {
+ snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply);
+ strncat(options, tstr, OPTSTRING);
+ }
+ if (txmsg_cork) {
+ snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork);
+ strncat(options, tstr, OPTSTRING);
+ }
+ if (txmsg_start) {
+ snprintf(tstr, OPTSTRING, "start %d,", txmsg_start);
+ strncat(options, tstr, OPTSTRING);
+ }
+ if (txmsg_end) {
+ snprintf(tstr, OPTSTRING, "end %d,", txmsg_end);
+ strncat(options, tstr, OPTSTRING);
+ }
+ if (txmsg_ingress)
+ strncat(options, "ingress,", OPTSTRING);
+ if (txmsg_skb)
+ strncat(options, "skb,", OPTSTRING);
+}
+
+static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
+{
+ char *options = calloc(OPTSTRING, sizeof(char));
+ int err;
+
+ if (test == SENDPAGE)
+ opt->sendpage = true;
+ else
+ opt->sendpage = false;
+
+ if (txmsg_drop)
+ opt->drop_expected = true;
+ else
+ opt->drop_expected = false;
+
+ test_options(options);
+
+ fprintf(stdout,
+ "[TEST %i]: (%i, %i, %i, %s, %s): ",
+ test_cnt, opt->rate, opt->iov_count, opt->iov_length,
+ test_to_str(test), options);
+ fflush(stdout);
+ err = run_options(opt, cgrp, test);
+ fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED");
+ test_cnt++;
+ !err ? passed++ : failed++;
+ free(options);
+ return err;
+}
+
+static int test_exec(int cgrp, struct sockmap_options *opt)
+{
+ int err = __test_exec(cgrp, SENDMSG, opt);
+
+ if (err)
+ goto out;
+
+ err = __test_exec(cgrp, SENDPAGE, opt);
+out:
+ return err;
+}
+
+static int test_loop(int cgrp)
+{
+ struct sockmap_options opt;
+
+ int err, i, l, r;
+
+ opt.verbose = 0;
+ opt.base = false;
+ opt.sendpage = false;
+ opt.data_test = false;
+ opt.drop_expected = false;
+ opt.iov_count = 0;
+ opt.iov_length = 0;
+ opt.rate = 0;
+
+ r = 1;
+ for (i = 1; i < 100; i += 33) {
+ for (l = 1; l < 100; l += 33) {
+ opt.rate = r;
+ opt.iov_count = i;
+ opt.iov_length = l;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+ }
+ }
+ sched_yield();
+out:
+ return err;
+}
+
+static int test_txmsg(int cgrp)
+{
+ int err;
+
+ txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+ txmsg_apply = txmsg_cork = 0;
+ txmsg_ingress = txmsg_skb = 0;
+
+ txmsg_pass = 1;
+ err = test_loop(cgrp);
+ txmsg_pass = 0;
+ if (err)
+ goto out;
+
+ txmsg_redir = 1;
+ err = test_loop(cgrp);
+ txmsg_redir = 0;
+ if (err)
+ goto out;
+
+ txmsg_drop = 1;
+ err = test_loop(cgrp);
+ txmsg_drop = 0;
+ if (err)
+ goto out;
+
+ txmsg_redir = 1;
+ txmsg_ingress = 1;
+ err = test_loop(cgrp);
+ txmsg_redir = 0;
+ txmsg_ingress = 0;
+ if (err)
+ goto out;
+out:
+ txmsg_pass = 0;
+ txmsg_redir = 0;
+ txmsg_drop = 0;
+ return err;
+}
+
+static int test_send(struct sockmap_options *opt, int cgrp)
+{
+ int err;
+
+ opt->iov_length = 1;
+ opt->iov_count = 1;
+ opt->rate = 1;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->iov_length = 1;
+ opt->iov_count = 1024;
+ opt->rate = 1;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->iov_length = 1024;
+ opt->iov_count = 1;
+ opt->rate = 1;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->iov_length = 1;
+ opt->iov_count = 1;
+ opt->rate = 512;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->iov_length = 256;
+ opt->iov_count = 1024;
+ opt->rate = 2;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->rate = 100;
+ opt->iov_count = 1;
+ opt->iov_length = 5;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+out:
+ sched_yield();
+ return err;
+}
+
+static int test_mixed(int cgrp)
+{
+ struct sockmap_options opt = {0};
+ int err;
+
+ txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+ txmsg_apply = txmsg_cork = 0;
+ txmsg_start = txmsg_end = 0;
+ /* Test small and large iov_count values with pass/redir/apply/cork */
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1;
+ txmsg_cork = 0;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 0;
+ txmsg_cork = 1;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1;
+ txmsg_cork = 1;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1024;
+ txmsg_cork = 0;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 0;
+ txmsg_cork = 1024;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1024;
+ txmsg_cork = 1024;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_cork = 4096;
+ txmsg_apply = 4096;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 1;
+ txmsg_cork = 0;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 0;
+ txmsg_cork = 1;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 1024;
+ txmsg_cork = 0;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 0;
+ txmsg_cork = 1024;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 1024;
+ txmsg_cork = 1024;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_cork = 4096;
+ txmsg_apply = 4096;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+out:
+ return err;
+}
+
+static int test_start_end(int cgrp)
+{
+ struct sockmap_options opt = {0};
+ int err, i;
+
+ /* Test basic start/end with lots of iov_count and iov_lengths */
+ txmsg_start = 1;
+ txmsg_end = 2;
+ err = test_txmsg(cgrp);
+ if (err)
+ goto out;
+
+ /* Test start/end with cork */
+ opt.rate = 16;
+ opt.iov_count = 1;
+ opt.iov_length = 100;
+ txmsg_cork = 1600;
+
+ for (i = 99; i <= 1600; i += 500) {
+ txmsg_start = 0;
+ txmsg_end = i;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+ }
+
+ /* Test start/end with cork but pull data in middle */
+ for (i = 199; i <= 1600; i += 500) {
+ txmsg_start = 100;
+ txmsg_end = i;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+ }
+
+ /* Test start/end with cork pulling last sg entry */
+ txmsg_start = 1500;
+ txmsg_end = 1600;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+
+ /* Test start/end pull of single byte in last page */
+ txmsg_start = 1111;
+ txmsg_end = 1112;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+
+ /* Test start/end with end < start */
+ txmsg_start = 1111;
+ txmsg_end = 0;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+
+ /* Test start/end with end > data */
+ txmsg_start = 0;
+ txmsg_end = 1601;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+
+ /* Test start/end with start > data */
+ txmsg_start = 1601;
+ txmsg_end = 1600;
+ err = test_exec(cgrp, &opt);
+
+out:
+ txmsg_start = 0;
+ txmsg_end = 0;
+ sched_yield();
+ return err;
+}
+
+char *map_names[] = {
+ "sock_map",
+ "sock_map_txmsg",
+ "sock_map_redir",
+ "sock_apply_bytes",
+ "sock_cork_bytes",
+ "sock_pull_bytes",
+ "sock_redir_flags",
+ "sock_skb_opts",
+};
+
+int prog_attach_type[] = {
+ BPF_SK_SKB_STREAM_PARSER,
+ BPF_SK_SKB_STREAM_VERDICT,
+ BPF_CGROUP_SOCK_OPS,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+};
+
+int prog_type[] = {
+ BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_SOCK_OPS,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+};
+
+static int populate_progs(char *bpf_file)
+{
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int i = 0;
+ long err;
+
+ obj = bpf_object__open(bpf_file);
+ err = libbpf_get_error(obj);
+ if (err) {
+ char err_buf[256];
+
+ libbpf_strerror(err, err_buf, sizeof(err_buf));
+ printf("Unable to load eBPF objects in file '%s' : %s\n",
+ bpf_file, err_buf);
+ return -1;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ bpf_program__set_type(prog, prog_type[i]);
+ bpf_program__set_expected_attach_type(prog,
+ prog_attach_type[i]);
+ i++;
+ }
+
+ i = bpf_object__load(obj);
+ i = 0;
+ bpf_object__for_each_program(prog, obj) {
+ prog_fd[i] = bpf_program__fd(prog);
+ i++;
+ }
+
+ for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) {
+ maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
+ map_fd[i] = bpf_map__fd(maps[i]);
+ if (map_fd[i] < 0) {
+ fprintf(stderr, "load_bpf_file: (%i) %s\n",
+ map_fd[i], strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int __test_suite(char *bpf_file)
+{
+ int cg_fd, err;
+
+ err = populate_progs(bpf_file);
+ if (err < 0) {
+ fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
+ return err;
+ }
+
+ if (setup_cgroup_environment()) {
+ fprintf(stderr, "ERROR: cgroup env failed\n");
+ return -EINVAL;
+ }
+
+ cg_fd = create_and_get_cgroup(CG_PATH);
+ if (cg_fd < 0) {
+ fprintf(stderr,
+ "ERROR: (%i) open cg path failed: %s\n",
+ cg_fd, optarg);
+ return cg_fd;
+ }
+
+ if (join_cgroup(CG_PATH)) {
+ fprintf(stderr, "ERROR: failed to join cgroup\n");
+ return -EINVAL;
+ }
+
+ /* Tests basic commands and APIs with range of iov values */
+ txmsg_start = txmsg_end = 0;
+ err = test_txmsg(cg_fd);
+ if (err)
+ goto out;
+
+ /* Tests interesting combinations of APIs used together */
+ err = test_mixed(cg_fd);
+ if (err)
+ goto out;
+
+ /* Tests pull_data API using start/end API */
+ err = test_start_end(cg_fd);
+ if (err)
+ goto out;
+
+out:
+ printf("Summary: %i PASSED %i FAILED\n", passed, failed);
+ cleanup_cgroup_environment();
+ close(cg_fd);
+ return err;
+}
+
+static int test_suite(void)
+{
+ int err;
+
+ err = __test_suite(BPF_SOCKMAP_FILENAME);
+ if (err)
+ goto out;
+ err = __test_suite(BPF_SOCKHASH_FILENAME);
+out:
+ return err;
+}
+
+int main(int argc, char **argv)
+{
+ int iov_count = 1, length = 1024, rate = 1;
+ struct sockmap_options options = {0};
+ int opt, longindex, err, cg_fd = 0;
+ char *bpf_file = BPF_SOCKMAP_FILENAME;
+ int test = PING_PONG;
+
+ if (argc < 2)
+ return test_suite();
+
+ while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
+ long_options, &longindex)) != -1) {
+ switch (opt) {
+ case 's':
+ txmsg_start = atoi(optarg);
+ break;
+ case 'e':
+ txmsg_end = atoi(optarg);
+ break;
+ case 'a':
+ txmsg_apply = atoi(optarg);
+ break;
+ case 'k':
+ txmsg_cork = atoi(optarg);
+ break;
+ case 'c':
+ cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
+ if (cg_fd < 0) {
+ fprintf(stderr,
+ "ERROR: (%i) open cg path failed: %s\n",
+ cg_fd, optarg);
+ return cg_fd;
+ }
+ break;
+ case 'r':
+ rate = atoi(optarg);
+ break;
+ case 'v':
+ options.verbose = 1;
+ break;
+ case 'i':
+ iov_count = atoi(optarg);
+ break;
+ case 'l':
+ length = atoi(optarg);
+ break;
+ case 'd':
+ options.data_test = true;
+ break;
+ case 't':
+ if (strcmp(optarg, "ping") == 0) {
+ test = PING_PONG;
+ } else if (strcmp(optarg, "sendmsg") == 0) {
+ test = SENDMSG;
+ } else if (strcmp(optarg, "base") == 0) {
+ test = BASE;
+ } else if (strcmp(optarg, "base_sendpage") == 0) {
+ test = BASE_SENDPAGE;
+ } else if (strcmp(optarg, "sendpage") == 0) {
+ test = SENDPAGE;
+ } else {
+ usage(argv);
+ return -1;
+ }
+ break;
+ case 0:
+ break;
+ case 'h':
+ default:
+ usage(argv);
+ return -1;
+ }
+ }
+
+ if (!cg_fd) {
+ fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
+ argv[0]);
+ return -1;
+ }
+
+ err = populate_progs(bpf_file);
+ if (err) {
+ fprintf(stderr, "populate program: (%s) %s\n",
+ bpf_file, strerror(errno));
+ return 1;
+ }
+ running = 1;
+
+ /* catch SIGINT */
+ signal(SIGINT, running_handler);
+
+ options.iov_count = iov_count;
+ options.iov_length = length;
+ options.rate = rate;
+
+ err = run_options(&options, cg_fd, test);
+ close(cg_fd);
+ return err;
+}
+
+void running_handler(int a)
+{
+ running = 0;
+}
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.c b/tools/testing/selftests/bpf/test_sockmap_kern.c
new file mode 100644
index 000000000..677b2ed1c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+#define SOCKMAP
+#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKMAP
+#include "./test_sockmap_kern.h"
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
new file mode 100644
index 000000000..8e8e41780
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.h
@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* Sockmap sample program connects a client and a backend together
+ * using cgroups.
+ *
+ * client:X <---> frontend:80 client:X <---> backend:80
+ *
+ * For simplicity we hard code values here and bind 1:1. The hard
+ * coded values are part of the setup in sockmap.sh script that
+ * is associated with this BPF program.
+ *
+ * The bpf_printk is verbose and prints information as connections
+ * are established and verdicts are decided.
+ */
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+struct bpf_map_def SEC("maps") sock_map = {
+ .type = TEST_MAP_TYPE,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_txmsg = {
+ .type = TEST_MAP_TYPE,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_redir = {
+ .type = TEST_MAP_TYPE,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_apply_bytes = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_cork_bytes = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_pull_bytes = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 2
+};
+
+struct bpf_map_def SEC("maps") sock_redir_flags = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_skb_opts = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+SEC("sk_skb1")
+int bpf_prog1(struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
+SEC("sk_skb2")
+int bpf_prog2(struct __sk_buff *skb)
+{
+ __u32 lport = skb->local_port;
+ __u32 rport = skb->remote_port;
+ int len, *f, ret, zero = 0;
+ __u64 flags = 0;
+
+ if (lport == 10000)
+ ret = 10;
+ else
+ ret = 1;
+
+ len = (__u32)skb->data_end - (__u32)skb->data;
+ f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
+ if (f && *f) {
+ ret = 3;
+ flags = *f;
+ }
+
+ bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
+ len, flags);
+#ifdef SOCKMAP
+ return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
+#else
+ return bpf_sk_redirect_hash(skb, &sock_map, &ret, flags);
+#endif
+
+}
+
+SEC("sockops")
+int bpf_sockmap(struct bpf_sock_ops *skops)
+{
+ __u32 lport, rport;
+ int op, err = 0, index, key, ret;
+
+
+ op = (int) skops->op;
+
+ switch (op) {
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ lport = skops->local_port;
+ rport = skops->remote_port;
+
+ if (lport == 10000) {
+ ret = 1;
+#ifdef SOCKMAP
+ err = bpf_sock_map_update(skops, &sock_map, &ret,
+ BPF_NOEXIST);
+#else
+ err = bpf_sock_hash_update(skops, &sock_map, &ret,
+ BPF_NOEXIST);
+#endif
+ bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
+ lport, bpf_ntohl(rport), err);
+ }
+ break;
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ lport = skops->local_port;
+ rport = skops->remote_port;
+
+ if (bpf_ntohl(rport) == 10001) {
+ ret = 10;
+#ifdef SOCKMAP
+ err = bpf_sock_map_update(skops, &sock_map, &ret,
+ BPF_NOEXIST);
+#else
+ err = bpf_sock_hash_update(skops, &sock_map, &ret,
+ BPF_NOEXIST);
+#endif
+ bpf_printk("active(%i -> %i) map ctx update err: %d\n",
+ lport, bpf_ntohl(rport), err);
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+SEC("sk_msg1")
+int bpf_prog4(struct sk_msg_md *msg)
+{
+ int *bytes, zero = 0, one = 1;
+ int *start, *end;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ bpf_msg_cork_bytes(msg, *bytes);
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end)
+ bpf_msg_pull_data(msg, *start, *end, 0);
+ return SK_PASS;
+}
+
+SEC("sk_msg2")
+int bpf_prog5(struct sk_msg_md *msg)
+{
+ int err1 = -1, err2 = -1, zero = 0, one = 1;
+ int *bytes, *start, *end, len1, len2;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ err1 = bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ err2 = bpf_msg_cork_bytes(msg, *bytes);
+ len1 = (__u64)msg->data_end - (__u64)msg->data;
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end) {
+ int err;
+
+ bpf_printk("sk_msg2: pull(%i:%i)\n",
+ start ? *start : 0, end ? *end : 0);
+ err = bpf_msg_pull_data(msg, *start, *end, 0);
+ if (err)
+ bpf_printk("sk_msg2: pull_data err %i\n",
+ err);
+ len2 = (__u64)msg->data_end - (__u64)msg->data;
+ bpf_printk("sk_msg2: length update %i->%i\n",
+ len1, len2);
+ }
+ bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
+ len1, err1, err2);
+ return SK_PASS;
+}
+
+SEC("sk_msg3")
+int bpf_prog6(struct sk_msg_md *msg)
+{
+ int *bytes, zero = 0, one = 1, key = 0;
+ int *start, *end, *f;
+ __u64 flags = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ bpf_msg_cork_bytes(msg, *bytes);
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end)
+ bpf_msg_pull_data(msg, *start, *end, 0);
+ f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+ if (f && *f) {
+ key = 2;
+ flags = *f;
+ }
+#ifdef SOCKMAP
+ return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+#else
+ return bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
+#endif
+}
+
+SEC("sk_msg4")
+int bpf_prog7(struct sk_msg_md *msg)
+{
+ int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
+ int *f, *bytes, *start, *end, len1, len2;
+ __u64 flags = 0;
+
+ int err;
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ err1 = bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ err2 = bpf_msg_cork_bytes(msg, *bytes);
+ len1 = (__u64)msg->data_end - (__u64)msg->data;
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end) {
+
+ bpf_printk("sk_msg2: pull(%i:%i)\n",
+ start ? *start : 0, end ? *end : 0);
+ err = bpf_msg_pull_data(msg, *start, *end, 0);
+ if (err)
+ bpf_printk("sk_msg2: pull_data err %i\n",
+ err);
+ len2 = (__u64)msg->data_end - (__u64)msg->data;
+ bpf_printk("sk_msg2: length update %i->%i\n",
+ len1, len2);
+ }
+ f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+ if (f && *f) {
+ key = 2;
+ flags = *f;
+ }
+ bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
+ len1, flags, err1 ? err1 : err2);
+#ifdef SOCKMAP
+ err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+#else
+ err = bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
+#endif
+ bpf_printk("sk_msg3: err %i\n", err);
+ return err;
+}
+
+SEC("sk_msg5")
+int bpf_prog8(struct sk_msg_md *msg)
+{
+ void *data_end = (void *)(long) msg->data_end;
+ void *data = (void *)(long) msg->data;
+ int ret = 0, *bytes, zero = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes) {
+ ret = bpf_msg_apply_bytes(msg, *bytes);
+ if (ret)
+ return SK_DROP;
+ } else {
+ return SK_DROP;
+ }
+ return SK_PASS;
+}
+SEC("sk_msg6")
+int bpf_prog9(struct sk_msg_md *msg)
+{
+ void *data_end = (void *)(long) msg->data_end;
+ void *data = (void *)(long) msg->data;
+ int ret = 0, *bytes, zero = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes) {
+ if (((__u64)data_end - (__u64)data) >= *bytes)
+ return SK_PASS;
+ ret = bpf_msg_cork_bytes(msg, *bytes);
+ if (ret)
+ return SK_DROP;
+ }
+ return SK_PASS;
+}
+
+SEC("sk_msg7")
+int bpf_prog10(struct sk_msg_md *msg)
+{
+ int *bytes, zero = 0, one = 1;
+ int *start, *end;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ bpf_msg_cork_bytes(msg, *bytes);
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end)
+ bpf_msg_pull_data(msg, *start, *end, 0);
+
+ return SK_DROP;
+}
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
new file mode 100644
index 000000000..d86c281e9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
+struct bpf_map_def SEC("maps") control_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") stackid_hmap = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 16384,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+ .type = BPF_MAP_TYPE_STACK_TRACE,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct bpf_stack_build_id)
+ * PERF_MAX_STACK_DEPTH,
+ .max_entries = 128,
+ .map_flags = BPF_F_STACK_BUILD_ID,
+};
+
+struct bpf_map_def SEC("maps") stack_amap = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct bpf_stack_build_id)
+ * PERF_MAX_STACK_DEPTH,
+ .max_entries = 128,
+};
+
+/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
+struct random_urandom_args {
+ unsigned long long pad;
+ int got_bits;
+ int pool_left;
+ int input_left;
+};
+
+SEC("tracepoint/random/urandom_read")
+int oncpu(struct random_urandom_args *args)
+{
+ __u32 max_len = sizeof(struct bpf_stack_build_id)
+ * PERF_MAX_STACK_DEPTH;
+ __u32 key = 0, val = 0, *value_p;
+ void *stack_p;
+
+ value_p = bpf_map_lookup_elem(&control_map, &key);
+ if (value_p && *value_p)
+ return 0; /* skip if non-zero *value_p */
+
+ /* The size of stackmap and stackid_hmap should be the same */
+ key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK);
+ if ((int)key >= 0) {
+ bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+ stack_p = bpf_map_lookup_elem(&stack_amap, &key);
+ if (stack_p)
+ bpf_get_stack(args, stack_p, max_len,
+ BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/test_stacktrace_map.c
new file mode 100644
index 000000000..af111af7c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stacktrace_map.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
+struct bpf_map_def SEC("maps") control_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") stackid_hmap = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 16384,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+ .type = BPF_MAP_TYPE_STACK_TRACE,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
+ .max_entries = 16384,
+};
+
+struct bpf_map_def SEC("maps") stack_amap = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
+ .max_entries = 16384,
+};
+
+/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+struct sched_switch_args {
+ unsigned long long pad;
+ char prev_comm[16];
+ int prev_pid;
+ int prev_prio;
+ long long prev_state;
+ char next_comm[16];
+ int next_pid;
+ int next_prio;
+};
+
+SEC("tracepoint/sched/sched_switch")
+int oncpu(struct sched_switch_args *ctx)
+{
+ __u32 max_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
+ __u32 key = 0, val = 0, *value_p;
+ void *stack_p;
+
+ value_p = bpf_map_lookup_elem(&control_map, &key);
+ if (value_p && *value_p)
+ return 0; /* skip if non-zero *value_p */
+
+ /* The size of stackmap and stackid_hmap should be the same */
+ key = bpf_get_stackid(ctx, &stackmap, 0);
+ if ((int)key >= 0) {
+ bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+ stack_p = bpf_map_lookup_elem(&stack_amap, &key);
+ if (stack_p)
+ bpf_get_stack(ctx, stack_p, max_len, 0);
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
new file mode 100644
index 000000000..6272c784c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <time.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <sched.h>
+#include <limits.h>
+#include <assert.h>
+
+#include <sys/socket.h>
+
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/if_alg.h>
+
+#include <bpf/bpf.h>
+
+#include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
+
+static struct bpf_insn prog[BPF_MAXINSNS];
+
+static void bpf_gen_imm_prog(unsigned int insns, int fd_map)
+{
+ int i;
+
+ srand(time(NULL));
+ for (i = 0; i < insns; i++)
+ prog[i] = BPF_ALU64_IMM(BPF_MOV, i % BPF_REG_10, rand());
+ prog[i - 1] = BPF_EXIT_INSN();
+}
+
+static void bpf_gen_map_prog(unsigned int insns, int fd_map)
+{
+ int i, j = 0;
+
+ for (i = 0; i + 1 < insns; i += 2) {
+ struct bpf_insn tmp[] = {
+ BPF_LD_MAP_FD(j++ % BPF_REG_10, fd_map)
+ };
+
+ memcpy(&prog[i], tmp, sizeof(tmp));
+ }
+ if (insns % 2 == 0)
+ prog[insns - 2] = BPF_ALU64_IMM(BPF_MOV, i % BPF_REG_10, 42);
+ prog[insns - 1] = BPF_EXIT_INSN();
+}
+
+static int bpf_try_load_prog(int insns, int fd_map,
+ void (*bpf_filler)(unsigned int insns,
+ int fd_map))
+{
+ int fd_prog;
+
+ bpf_filler(insns, fd_map);
+ fd_prog = bpf_load_program(BPF_PROG_TYPE_SCHED_CLS, prog, insns, "", 0,
+ NULL, 0);
+ assert(fd_prog > 0);
+ if (fd_map > 0)
+ bpf_filler(insns, 0);
+ return fd_prog;
+}
+
+static int __hex2bin(char ch)
+{
+ if ((ch >= '0') && (ch <= '9'))
+ return ch - '0';
+ ch = tolower(ch);
+ if ((ch >= 'a') && (ch <= 'f'))
+ return ch - 'a' + 10;
+ return -1;
+}
+
+static int hex2bin(uint8_t *dst, const char *src, size_t count)
+{
+ while (count--) {
+ int hi = __hex2bin(*src++);
+ int lo = __hex2bin(*src++);
+
+ if ((hi < 0) || (lo < 0))
+ return -1;
+ *dst++ = (hi << 4) | lo;
+ }
+ return 0;
+}
+
+static void tag_from_fdinfo(int fd_prog, uint8_t *tag, uint32_t len)
+{
+ const int prefix_len = sizeof("prog_tag:\t") - 1;
+ char buff[256];
+ int ret = -1;
+ FILE *fp;
+
+ snprintf(buff, sizeof(buff), "/proc/%d/fdinfo/%d", getpid(),
+ fd_prog);
+ fp = fopen(buff, "r");
+ assert(fp);
+
+ while (fgets(buff, sizeof(buff), fp)) {
+ if (strncmp(buff, "prog_tag:\t", prefix_len))
+ continue;
+ ret = hex2bin(tag, buff + prefix_len, len);
+ break;
+ }
+
+ fclose(fp);
+ assert(!ret);
+}
+
+static void tag_from_alg(int insns, uint8_t *tag, uint32_t len)
+{
+ static const struct sockaddr_alg alg = {
+ .salg_family = AF_ALG,
+ .salg_type = "hash",
+ .salg_name = "sha1",
+ };
+ int fd_base, fd_alg, ret;
+ ssize_t size;
+
+ fd_base = socket(AF_ALG, SOCK_SEQPACKET, 0);
+ assert(fd_base > 0);
+
+ ret = bind(fd_base, (struct sockaddr *)&alg, sizeof(alg));
+ assert(!ret);
+
+ fd_alg = accept(fd_base, NULL, 0);
+ assert(fd_alg > 0);
+
+ insns *= sizeof(struct bpf_insn);
+ size = write(fd_alg, prog, insns);
+ assert(size == insns);
+
+ size = read(fd_alg, tag, len);
+ assert(size == len);
+
+ close(fd_alg);
+ close(fd_base);
+}
+
+static void tag_dump(const char *prefix, uint8_t *tag, uint32_t len)
+{
+ int i;
+
+ printf("%s", prefix);
+ for (i = 0; i < len; i++)
+ printf("%02x", tag[i]);
+ printf("\n");
+}
+
+static void tag_exit_report(int insns, int fd_map, uint8_t *ftag,
+ uint8_t *atag, uint32_t len)
+{
+ printf("Program tag mismatch for %d insns%s!\n", insns,
+ fd_map < 0 ? "" : " with map");
+
+ tag_dump(" fdinfo result: ", ftag, len);
+ tag_dump(" af_alg result: ", atag, len);
+ exit(1);
+}
+
+static void do_test(uint32_t *tests, int start_insns, int fd_map,
+ void (*bpf_filler)(unsigned int insns, int fd))
+{
+ int i, fd_prog;
+
+ for (i = start_insns; i <= BPF_MAXINSNS; i++) {
+ uint8_t ftag[8], atag[sizeof(ftag)];
+
+ fd_prog = bpf_try_load_prog(i, fd_map, bpf_filler);
+ tag_from_fdinfo(fd_prog, ftag, sizeof(ftag));
+ tag_from_alg(i, atag, sizeof(atag));
+ if (memcmp(ftag, atag, sizeof(ftag)))
+ tag_exit_report(i, fd_map, ftag, atag, sizeof(ftag));
+
+ close(fd_prog);
+ sched_yield();
+ (*tests)++;
+ }
+}
+
+int main(void)
+{
+ uint32_t tests = 0;
+ int i, fd_map;
+
+ fd_map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int),
+ sizeof(int), 1, BPF_F_NO_PREALLOC);
+ assert(fd_map > 0);
+
+ for (i = 0; i < 5; i++) {
+ do_test(&tests, 2, -1, bpf_gen_imm_prog);
+ do_test(&tests, 3, fd_map, bpf_gen_map_prog);
+ }
+
+ printf("test_tag: OK (%u tests)\n", tests);
+ close(fd_map);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_tcp_estats.c b/tools/testing/selftests/bpf/test_tcp_estats.c
new file mode 100644
index 000000000..bee3bbecc
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcp_estats.c
@@ -0,0 +1,258 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+/* This program shows clang/llvm is able to generate code pattern
+ * like:
+ * _tcp_send_active_reset:
+ * 0: bf 16 00 00 00 00 00 00 r6 = r1
+ * ......
+ * 335: b7 01 00 00 0f 00 00 00 r1 = 15
+ * 336: 05 00 48 00 00 00 00 00 goto 72
+ *
+ * LBB0_3:
+ * 337: b7 01 00 00 01 00 00 00 r1 = 1
+ * 338: 63 1a d0 ff 00 00 00 00 *(u32 *)(r10 - 48) = r1
+ * 408: b7 01 00 00 03 00 00 00 r1 = 3
+ *
+ * LBB0_4:
+ * 409: 71 a2 fe ff 00 00 00 00 r2 = *(u8 *)(r10 - 2)
+ * 410: bf a7 00 00 00 00 00 00 r7 = r10
+ * 411: 07 07 00 00 b8 ff ff ff r7 += -72
+ * 412: bf 73 00 00 00 00 00 00 r3 = r7
+ * 413: 0f 13 00 00 00 00 00 00 r3 += r1
+ * 414: 73 23 2d 00 00 00 00 00 *(u8 *)(r3 + 45) = r2
+ *
+ * From the above code snippet, the code generated by the compiler
+ * is reasonable. The "r1" is assigned to different values in basic
+ * blocks "_tcp_send_active_reset" and "LBB0_3", and used in "LBB0_4".
+ * The verifier should be able to handle such code patterns.
+ */
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/ipv6.h>
+#include <linux/version.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+
+#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define TCP_ESTATS_MAGIC 0xBAADBEEF
+
+/* This test case needs "sock" and "pt_regs" data structure.
+ * Recursively, "sock" needs "sock_common" and "inet_sock".
+ * However, this is a unit test case only for
+ * verifier purpose without bpf program execution.
+ * We can safely mock much simpler data structures, basically
+ * only taking the necessary fields from kernel headers.
+ */
+typedef __u32 __bitwise __portpair;
+typedef __u64 __bitwise __addrpair;
+
+struct sock_common {
+ unsigned short skc_family;
+ union {
+ __addrpair skc_addrpair;
+ struct {
+ __be32 skc_daddr;
+ __be32 skc_rcv_saddr;
+ };
+ };
+ union {
+ __portpair skc_portpair;
+ struct {
+ __be16 skc_dport;
+ __u16 skc_num;
+ };
+ };
+ struct in6_addr skc_v6_daddr;
+ struct in6_addr skc_v6_rcv_saddr;
+};
+
+struct sock {
+ struct sock_common __sk_common;
+#define sk_family __sk_common.skc_family
+#define sk_v6_daddr __sk_common.skc_v6_daddr
+#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
+};
+
+struct inet_sock {
+ struct sock sk;
+#define inet_daddr sk.__sk_common.skc_daddr
+#define inet_dport sk.__sk_common.skc_dport
+ __be32 inet_saddr;
+ __be16 inet_sport;
+};
+
+struct pt_regs {
+ long di;
+};
+
+static inline struct inet_sock *inet_sk(const struct sock *sk)
+{
+ return (struct inet_sock *)sk;
+}
+
+/* Define various data structures for state recording.
+ * Some fields are not used due to test simplification.
+ */
+enum tcp_estats_addrtype {
+ TCP_ESTATS_ADDRTYPE_IPV4 = 1,
+ TCP_ESTATS_ADDRTYPE_IPV6 = 2
+};
+
+enum tcp_estats_event_type {
+ TCP_ESTATS_ESTABLISH,
+ TCP_ESTATS_PERIODIC,
+ TCP_ESTATS_TIMEOUT,
+ TCP_ESTATS_RETRANSMIT_TIMEOUT,
+ TCP_ESTATS_RETRANSMIT_OTHER,
+ TCP_ESTATS_SYN_RETRANSMIT,
+ TCP_ESTATS_SYNACK_RETRANSMIT,
+ TCP_ESTATS_TERM,
+ TCP_ESTATS_TX_RESET,
+ TCP_ESTATS_RX_RESET,
+ TCP_ESTATS_WRITE_TIMEOUT,
+ TCP_ESTATS_CONN_TIMEOUT,
+ TCP_ESTATS_ACK_LATENCY,
+ TCP_ESTATS_NEVENTS,
+};
+
+struct tcp_estats_event {
+ int pid;
+ int cpu;
+ unsigned long ts;
+ unsigned int magic;
+ enum tcp_estats_event_type event_type;
+};
+
+/* The below data structure is packed in order for
+ * llvm compiler to generate expected code.
+ */
+struct tcp_estats_conn_id {
+ unsigned int localaddressType;
+ struct {
+ unsigned char data[16];
+ } localaddress;
+ struct {
+ unsigned char data[16];
+ } remaddress;
+ unsigned short localport;
+ unsigned short remport;
+} __attribute__((__packed__));
+
+struct tcp_estats_basic_event {
+ struct tcp_estats_event event;
+ struct tcp_estats_conn_id conn_id;
+};
+
+struct bpf_map_def SEC("maps") ev_record_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct tcp_estats_basic_event),
+ .max_entries = 1024,
+};
+
+struct dummy_tracepoint_args {
+ unsigned long long pad;
+ struct sock *sock;
+};
+
+static __always_inline void tcp_estats_ev_init(struct tcp_estats_event *event,
+ enum tcp_estats_event_type type)
+{
+ event->magic = TCP_ESTATS_MAGIC;
+ event->ts = bpf_ktime_get_ns();
+ event->event_type = type;
+}
+
+static __always_inline void unaligned_u32_set(unsigned char *to, __u8 *from)
+{
+ to[0] = _(from[0]);
+ to[1] = _(from[1]);
+ to[2] = _(from[2]);
+ to[3] = _(from[3]);
+}
+
+static __always_inline void conn_id_ipv4_init(struct tcp_estats_conn_id *conn_id,
+ __be32 *saddr, __be32 *daddr)
+{
+ conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV4;
+
+ unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr);
+ unaligned_u32_set(conn_id->remaddress.data, (__u8 *)daddr);
+}
+
+static __always_inline void conn_id_ipv6_init(struct tcp_estats_conn_id *conn_id,
+ __be32 *saddr, __be32 *daddr)
+{
+ conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV6;
+
+ unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr);
+ unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32),
+ (__u8 *)(saddr + 1));
+ unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 2,
+ (__u8 *)(saddr + 2));
+ unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 3,
+ (__u8 *)(saddr + 3));
+
+ unaligned_u32_set(conn_id->remaddress.data,
+ (__u8 *)(daddr));
+ unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32),
+ (__u8 *)(daddr + 1));
+ unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 2,
+ (__u8 *)(daddr + 2));
+ unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 3,
+ (__u8 *)(daddr + 3));
+}
+
+static __always_inline void tcp_estats_conn_id_init(struct tcp_estats_conn_id *conn_id,
+ struct sock *sk)
+{
+ conn_id->localport = _(inet_sk(sk)->inet_sport);
+ conn_id->remport = _(inet_sk(sk)->inet_dport);
+
+ if (_(sk->sk_family) == AF_INET6)
+ conn_id_ipv6_init(conn_id,
+ sk->sk_v6_rcv_saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32);
+ else
+ conn_id_ipv4_init(conn_id,
+ &inet_sk(sk)->inet_saddr,
+ &inet_sk(sk)->inet_daddr);
+}
+
+static __always_inline void tcp_estats_init(struct sock *sk,
+ struct tcp_estats_event *event,
+ struct tcp_estats_conn_id *conn_id,
+ enum tcp_estats_event_type type)
+{
+ tcp_estats_ev_init(event, type);
+ tcp_estats_conn_id_init(conn_id, sk);
+}
+
+static __always_inline void send_basic_event(struct sock *sk,
+ enum tcp_estats_event_type type)
+{
+ struct tcp_estats_basic_event ev;
+ __u32 key = bpf_get_prandom_u32();
+
+ memset(&ev, 0, sizeof(ev));
+ tcp_estats_init(sk, &ev.event, &ev.conn_id, type);
+ bpf_map_update_elem(&ev_record_map, &key, &ev, BPF_ANY);
+}
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+ if (!arg->sock)
+ return 0;
+
+ send_basic_event(arg->sock, TCP_ESTATS_TX_RESET);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h
new file mode 100644
index 000000000..7bcfa6207
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf.h
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _TEST_TCPBPF_H
+#define _TEST_TCPBPF_H
+
+struct tcpbpf_globals {
+ __u32 event_map;
+ __u32 total_retrans;
+ __u32 data_segs_in;
+ __u32 data_segs_out;
+ __u32 bad_cb_test_rv;
+ __u32 good_cb_test_rv;
+ __u64 bytes_received;
+ __u64 bytes_acked;
+ __u32 num_listen;
+};
+#endif
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
new file mode 100644
index 000000000..4b7fd540c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+#include "test_tcpbpf.h"
+
+struct bpf_map_def SEC("maps") global_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct tcpbpf_globals),
+ .max_entries = 2,
+};
+
+static inline void update_event_map(int event)
+{
+ __u32 key = 0;
+ struct tcpbpf_globals g, *gp;
+
+ gp = bpf_map_lookup_elem(&global_map, &key);
+ if (gp == NULL) {
+ struct tcpbpf_globals g = {0};
+
+ g.event_map |= (1 << event);
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ } else {
+ g = *gp;
+ g.event_map |= (1 << event);
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ }
+}
+
+int _version SEC("version") = 1;
+
+SEC("sockops")
+int bpf_testcb(struct bpf_sock_ops *skops)
+{
+ int rv = -1;
+ int bad_call_rv = 0;
+ int good_call_rv = 0;
+ int op;
+ int v = 0;
+
+ op = (int) skops->op;
+
+ update_event_map(op);
+
+ switch (op) {
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ /* Test failure to set largest cb flag (assumes not defined) */
+ bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80);
+ /* Set callback */
+ good_call_rv = bpf_sock_ops_cb_flags_set(skops,
+ BPF_SOCK_OPS_STATE_CB_FLAG);
+ /* Update results */
+ {
+ __u32 key = 0;
+ struct tcpbpf_globals g, *gp;
+
+ gp = bpf_map_lookup_elem(&global_map, &key);
+ if (!gp)
+ break;
+ g = *gp;
+ g.bad_cb_test_rv = bad_call_rv;
+ g.good_cb_test_rv = good_call_rv;
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ }
+ break;
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ skops->sk_txhash = 0x12345f;
+ v = 0xff;
+ rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v,
+ sizeof(v));
+ break;
+ case BPF_SOCK_OPS_RTO_CB:
+ break;
+ case BPF_SOCK_OPS_RETRANS_CB:
+ break;
+ case BPF_SOCK_OPS_STATE_CB:
+ if (skops->args[1] == BPF_TCP_CLOSE) {
+ __u32 key = 0;
+ struct tcpbpf_globals g, *gp;
+
+ gp = bpf_map_lookup_elem(&global_map, &key);
+ if (!gp)
+ break;
+ g = *gp;
+ if (skops->args[0] == BPF_TCP_LISTEN) {
+ g.num_listen++;
+ } else {
+ g.total_retrans = skops->total_retrans;
+ g.data_segs_in = skops->data_segs_in;
+ g.data_segs_out = skops->data_segs_out;
+ g.bytes_received = skops->bytes_received;
+ g.bytes_acked = skops->bytes_acked;
+ }
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ }
+ break;
+ case BPF_SOCK_OPS_TCP_LISTEN_CB:
+ bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
+ break;
+ default:
+ rv = -1;
+ }
+ skops->reply = rv;
+ return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
new file mode 100644
index 000000000..a275c2971
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <sys/types.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#include "test_tcpbpf.h"
+
+#define EXPECT_EQ(expected, actual, fmt) \
+ do { \
+ if ((expected) != (actual)) { \
+ printf(" Value of: " #actual "\n" \
+ " Actual: %" fmt "\n" \
+ " Expected: %" fmt "\n", \
+ (actual), (expected)); \
+ goto err; \
+ } \
+ } while (0)
+
+int verify_result(const struct tcpbpf_globals *result)
+{
+ __u32 expected_events;
+
+ expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
+ (1 << BPF_SOCK_OPS_RWND_INIT) |
+ (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) |
+ (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) |
+ (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) |
+ (1 << BPF_SOCK_OPS_NEEDS_ECN) |
+ (1 << BPF_SOCK_OPS_STATE_CB) |
+ (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
+
+ EXPECT_EQ(expected_events, result->event_map, "#" PRIx32);
+ EXPECT_EQ(501ULL, result->bytes_received, "llu");
+ EXPECT_EQ(1002ULL, result->bytes_acked, "llu");
+ EXPECT_EQ(1, result->data_segs_in, PRIu32);
+ EXPECT_EQ(1, result->data_segs_out, PRIu32);
+ EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
+ EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
+ EXPECT_EQ(1, result->num_listen, PRIu32);
+
+ return 0;
+err:
+ return -1;
+}
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+ const char *name)
+{
+ struct bpf_map *map;
+
+ map = bpf_object__find_map_by_name(obj, name);
+ if (!map) {
+ printf("%s:FAIL:map '%s' not found\n", test, name);
+ return -1;
+ }
+ return bpf_map__fd(map);
+}
+
+int main(int argc, char **argv)
+{
+ const char *file = "test_tcpbpf_kern.o";
+ struct tcpbpf_globals g = {0};
+ const char *cg_path = "/foo";
+ int error = EXIT_FAILURE;
+ struct bpf_object *obj;
+ int prog_fd, map_fd;
+ int cg_fd = -1;
+ __u32 key = 0;
+ int rv;
+
+ if (setup_cgroup_environment())
+ goto err;
+
+ cg_fd = create_and_get_cgroup(cg_path);
+ if (!cg_fd)
+ goto err;
+
+ if (join_cgroup(cg_path))
+ goto err;
+
+ if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
+ printf("FAILED: load_bpf_file failed for: %s\n", file);
+ goto err;
+ }
+
+ rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+ if (rv) {
+ printf("FAILED: bpf_prog_attach: %d (%s)\n",
+ error, strerror(errno));
+ goto err;
+ }
+
+ if (system("./tcp_server.py")) {
+ printf("FAILED: TCP server\n");
+ goto err;
+ }
+
+ map_fd = bpf_find_map(__func__, obj, "global_map");
+ if (map_fd < 0)
+ goto err;
+
+ rv = bpf_map_lookup_elem(map_fd, &key, &g);
+ if (rv != 0) {
+ printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
+ goto err;
+ }
+
+ if (verify_result(&g)) {
+ printf("FAILED: Wrong stats\n");
+ goto err;
+ }
+
+ printf("PASSED!\n");
+ error = 0;
+err:
+ bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
+ close(cg_fd);
+ cleanup_cgroup_environment();
+ return error;
+}
diff --git a/tools/testing/selftests/bpf/test_tracepoint.c b/tools/testing/selftests/bpf/test_tracepoint.c
new file mode 100644
index 000000000..04bf08451
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tracepoint.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+struct sched_switch_args {
+ unsigned long long pad;
+ char prev_comm[16];
+ int prev_pid;
+ int prev_prio;
+ long long prev_state;
+ char next_comm[16];
+ int next_pid;
+ int next_prio;
+};
+
+SEC("tracepoint/sched/sched_switch")
+int oncpu(struct sched_switch_args *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
new file mode 100755
index 000000000..546aee3e9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -0,0 +1,731 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# End-to-end eBPF tunnel test suite
+# The script tests BPF network tunnel implementation.
+#
+# Topology:
+# ---------
+# root namespace | at_ns0 namespace
+# |
+# ----------- | -----------
+# | tnl dev | | | tnl dev | (overlay network)
+# ----------- | -----------
+# metadata-mode | native-mode
+# with bpf |
+# |
+# ---------- | ----------
+# | veth1 | --------- | veth0 | (underlay network)
+# ---------- peer ----------
+#
+#
+# Device Configuration
+# --------------------
+# Root namespace with metadata-mode tunnel + BPF
+# Device names and addresses:
+# veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)
+# tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200 (overlay)
+#
+# Namespace at_ns0 with native tunnel
+# Device names and addresses:
+# veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
+# tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100 (overlay)
+#
+#
+# End-to-end ping packet flow
+# ---------------------------
+# Most of the tests start by namespace creation, device configuration,
+# then ping the underlay and overlay network. When doing 'ping 10.1.1.100'
+# from root namespace, the following operations happen:
+# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
+# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
+# with remote_ip=172.16.1.200 and others.
+# 3) Outer tunnel header is prepended and route the packet to veth1's egress
+# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0
+# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
+# 6) Forward the packet to the overlay tnl dev
+
+PING_ARG="-c 3 -w 10 -q"
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+config_device()
+{
+ ip netns add at_ns0
+ ip link add veth0 type veth peer name veth1
+ ip link set veth0 netns at_ns0
+ ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip link set dev veth1 up mtu 1500
+ ip addr add dev veth1 172.16.1.200/24
+}
+
+add_gre_tunnel()
+{
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local 172.16.1.100 remote 172.16.1.200
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+ # root namespace
+ ip link add dev $DEV type $TYPE key 2 external
+ ip link set dev $DEV up
+ ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6gretap_tunnel()
+{
+
+ # assign ipv6 address
+ ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
+ local ::11 remote ::22
+
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip addr add dev $DEV 10.1.1.200/24
+ ip addr add dev $DEV fc80::200/24
+ ip link set dev $DEV up
+}
+
+add_erspan_tunnel()
+{
+ # at_ns0 namespace
+ if [ "$1" == "v1" ]; then
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local 172.16.1.100 remote 172.16.1.200 \
+ erspan_ver 1 erspan 123
+ else
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local 172.16.1.100 remote 172.16.1.200 \
+ erspan_ver 2 erspan_dir egress erspan_hwid 3
+ fi
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip link set dev $DEV up
+ ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6erspan_tunnel()
+{
+
+ # assign ipv6 address
+ ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # at_ns0 namespace
+ if [ "$1" == "v1" ]; then
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local ::11 remote ::22 \
+ erspan_ver 1 erspan 123
+ else
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local ::11 remote ::22 \
+ erspan_ver 2 erspan_dir egress erspan_hwid 7
+ fi
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip addr add dev $DEV 10.1.1.200/24
+ ip link set dev $DEV up
+}
+
+add_vxlan_tunnel()
+{
+ # Set static ARP entry here because iptables set-mark works
+ # on L3 packet, as a result not applying to ARP packets,
+ # causing errors at get_tunnel_{key/opt}.
+
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE \
+ id 2 dstport 4789 gbp remote 172.16.1.200
+ ip netns exec at_ns0 \
+ ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00
+ ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external gbp dstport 4789
+ ip link set dev $DEV address 52:54:00:d9:02:00 up
+ ip addr add dev $DEV 10.1.1.200/24
+ arp -s 10.1.1.100 52:54:00:d9:01:00
+}
+
+add_ip6vxlan_tunnel()
+{
+ #ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0
+ ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ #ip -4 addr del 172.16.1.200 dev veth1
+ ip -6 addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \
+ local ::11 remote ::22
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external dstport 4789
+ ip addr add dev $DEV 10.1.1.200/24
+ ip link set dev $DEV up
+}
+
+add_geneve_tunnel()
+{
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE \
+ id 2 dstport 6081 remote 172.16.1.200
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+ # root namespace
+ ip link add dev $DEV type $TYPE dstport 6081 external
+ ip link set dev $DEV up
+ ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6geneve_tunnel()
+{
+ ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE id 22 \
+ remote ::22 # geneve has no local option
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip addr add dev $DEV 10.1.1.200/24
+ ip link set dev $DEV up
+}
+
+add_ipip_tunnel()
+{
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE \
+ local 172.16.1.100 remote 172.16.1.200
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip link set dev $DEV up
+ ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ipip6tnl_tunnel()
+{
+ ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE \
+ local ::11 remote ::22
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip addr add dev $DEV 10.1.1.200/24
+ ip link set dev $DEV up
+}
+
+test_gre()
+{
+ TYPE=gretap
+ DEV_NS=gretap00
+ DEV=gretap11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_gre_tunnel
+ attach_bpf $DEV gre_set_tunnel gre_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6gre()
+{
+ TYPE=ip6gre
+ DEV_NS=ip6gre00
+ DEV=ip6gre11
+ ret=0
+
+ check $TYPE
+ config_device
+ # reuse the ip6gretap function
+ add_ip6gretap_tunnel
+ attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+ # underlay
+ ping6 $PING_ARG ::11
+ # overlay: ipv4 over ipv6
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ # overlay: ipv6 over ipv6
+ ip netns exec at_ns0 ping6 $PING_ARG fc80::200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6gretap()
+{
+ TYPE=ip6gretap
+ DEV_NS=ip6gretap00
+ DEV=ip6gretap11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ip6gretap_tunnel
+ attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+ # underlay
+ ping6 $PING_ARG ::11
+ # overlay: ipv4 over ipv6
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ # overlay: ipv6 over ipv6
+ ip netns exec at_ns0 ping6 $PING_ARG fc80::200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_erspan()
+{
+ TYPE=erspan
+ DEV_NS=erspan00
+ DEV=erspan11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_erspan_tunnel $1
+ attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6erspan()
+{
+ TYPE=ip6erspan
+ DEV_NS=ip6erspan00
+ DEV=ip6erspan11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ip6erspan_tunnel $1
+ attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
+ ping6 $PING_ARG ::11
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_vxlan()
+{
+ TYPE=vxlan
+ DEV_NS=vxlan00
+ DEV=vxlan11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_vxlan_tunnel
+ attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6vxlan()
+{
+ TYPE=vxlan
+ DEV_NS=ip6vxlan00
+ DEV=ip6vxlan11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ip6vxlan_tunnel
+ ip link set dev veth1 mtu 1500
+ attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel
+ # underlay
+ ping6 $PING_ARG ::11
+ # ip4 over ip6
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
+test_geneve()
+{
+ TYPE=geneve
+ DEV_NS=geneve00
+ DEV=geneve11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_geneve_tunnel
+ attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6geneve()
+{
+ TYPE=geneve
+ DEV_NS=ip6geneve00
+ DEV=ip6geneve11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ip6geneve_tunnel
+ attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
+test_ipip()
+{
+ TYPE=ipip
+ DEV_NS=ipip00
+ DEV=ipip11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ipip_tunnel
+ ip link set dev veth1 mtu 1500
+ attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ipip6()
+{
+ TYPE=ip6tnl
+ DEV_NS=ipip6tnl00
+ DEV=ipip6tnl11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ipip6tnl_tunnel
+ ip link set dev veth1 mtu 1500
+ attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
+ # underlay
+ ping6 $PING_ARG ::11
+ # ip4 over ip6
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+setup_xfrm_tunnel()
+{
+ auth=0x$(printf '1%.0s' {1..40})
+ enc=0x$(printf '2%.0s' {1..32})
+ spi_in_to_out=0x1
+ spi_out_to_in=0x2
+ # at_ns0 namespace
+ # at_ns0 -> root
+ ip netns exec at_ns0 \
+ ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
+ spi $spi_in_to_out reqid 1 mode tunnel \
+ auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+ ip netns exec at_ns0 \
+ ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \
+ tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
+ mode tunnel
+ # root -> at_ns0
+ ip netns exec at_ns0 \
+ ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
+ spi $spi_out_to_in reqid 2 mode tunnel \
+ auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+ ip netns exec at_ns0 \
+ ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \
+ tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
+ mode tunnel
+ # address & route
+ ip netns exec at_ns0 \
+ ip addr add dev veth0 10.1.1.100/32
+ ip netns exec at_ns0 \
+ ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \
+ src 10.1.1.100
+
+ # root namespace
+ # at_ns0 -> root
+ ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
+ spi $spi_in_to_out reqid 1 mode tunnel \
+ auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+ ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \
+ tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
+ mode tunnel
+ # root -> at_ns0
+ ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
+ spi $spi_out_to_in reqid 2 mode tunnel \
+ auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+ ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \
+ tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
+ mode tunnel
+ # address & route
+ ip addr add dev veth1 10.1.1.200/32
+ ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200
+}
+
+test_xfrm_tunnel()
+{
+ config_device
+ > /sys/kernel/debug/tracing/trace
+ setup_xfrm_tunnel
+ tc qdisc add dev veth1 clsact
+ tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \
+ sec xfrm_get_state
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ sleep 1
+ grep "reqid 1" /sys/kernel/debug/tracing/trace
+ check_err $?
+ grep "spi 0x1" /sys/kernel/debug/tracing/trace
+ check_err $?
+ grep "remote ip 0xac100164" /sys/kernel/debug/tracing/trace
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: xfrm tunnel"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
+}
+
+attach_bpf()
+{
+ DEV=$1
+ SET=$2
+ GET=$3
+ tc qdisc add dev $DEV clsact
+ tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET
+ tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET
+}
+
+cleanup()
+{
+ ip netns delete at_ns0 2> /dev/null
+ ip link del veth1 2> /dev/null
+ ip link del ipip11 2> /dev/null
+ ip link del ipip6tnl11 2> /dev/null
+ ip link del gretap11 2> /dev/null
+ ip link del ip6gre11 2> /dev/null
+ ip link del ip6gretap11 2> /dev/null
+ ip link del vxlan11 2> /dev/null
+ ip link del ip6vxlan11 2> /dev/null
+ ip link del geneve11 2> /dev/null
+ ip link del ip6geneve11 2> /dev/null
+ ip link del erspan11 2> /dev/null
+ ip link del ip6erspan11 2> /dev/null
+ ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null
+ ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null
+ ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null
+ ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null
+}
+
+cleanup_exit()
+{
+ echo "CATCH SIGKILL or SIGINT, cleanup and exit"
+ cleanup
+ exit 0
+}
+
+check()
+{
+ ip link help 2>&1 | grep -q "\s$1\s"
+ if [ $? -ne 0 ];then
+ echo "SKIP $1: iproute2 not support"
+ cleanup
+ return 1
+ fi
+}
+
+enable_debug()
+{
+ echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
+ echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
+ echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control
+ echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
+ echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
+}
+
+check_err()
+{
+ if [ $ret -eq 0 ]; then
+ ret=$1
+ fi
+}
+
+bpf_tunnel_test()
+{
+ echo "Testing GRE tunnel..."
+ test_gre
+ echo "Testing IP6GRE tunnel..."
+ test_ip6gre
+ echo "Testing IP6GRETAP tunnel..."
+ test_ip6gretap
+ echo "Testing ERSPAN tunnel..."
+ test_erspan v2
+ echo "Testing IP6ERSPAN tunnel..."
+ test_ip6erspan v2
+ echo "Testing VXLAN tunnel..."
+ test_vxlan
+ echo "Testing IP6VXLAN tunnel..."
+ test_ip6vxlan
+ echo "Testing GENEVE tunnel..."
+ test_geneve
+ echo "Testing IP6GENEVE tunnel..."
+ test_ip6geneve
+ echo "Testing IPIP tunnel..."
+ test_ipip
+ echo "Testing IPIP6 tunnel..."
+ test_ipip6
+ echo "Testing IPSec tunnel..."
+ test_xfrm_tunnel
+}
+
+trap cleanup 0 3 6
+trap cleanup_exit 2 9
+
+cleanup
+bpf_tunnel_test
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_tunnel_kern.c b/tools/testing/selftests/bpf/test_tunnel_kern.c
new file mode 100644
index 000000000..504df69c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tunnel_kern.c
@@ -0,0 +1,713 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2016 VMware
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/pkt_cls.h>
+#include <linux/erspan.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define ERROR(ret) do {\
+ char fmt[] = "ERROR line:%d ret:%d\n";\
+ bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
+ } while (0)
+
+int _version SEC("version") = 1;
+
+struct geneve_opt {
+ __be16 opt_class;
+ __u8 type;
+ __u8 length:5;
+ __u8 r3:1;
+ __u8 r2:1;
+ __u8 r1:1;
+ __u8 opt_data[8]; /* hard-coded to 8 byte */
+};
+
+struct vxlan_metadata {
+ __u32 gbp;
+};
+
+SEC("gre_set_tunnel")
+int _gre_set_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("gre_get_tunnel")
+int _gre_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ char fmt[] = "key %d remote ip 0x%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4);
+ return TC_ACT_OK;
+}
+
+SEC("ip6gretap_set_tunnel")
+int _ip6gretap_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+ key.tunnel_label = 0xabcde;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+ BPF_F_SEQ_NUMBER);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6gretap_get_tunnel")
+int _ip6gretap_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip6 ::%x label %x\n";
+ struct bpf_tunnel_key key;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+ return TC_ACT_OK;
+}
+
+SEC("erspan_set_tunnel")
+int _erspan_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&md, 0, sizeof(md));
+#ifdef ERSPAN_V1
+ md.version = 1;
+ md.u.index = bpf_htonl(123);
+#else
+ __u8 direction = 1;
+ __u8 hwid = 7;
+
+ md.version = 2;
+ md.u.md2.dir = direction;
+ md.u.md2.hwid = hwid & 0xf;
+ md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+#endif
+
+ ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("erspan_get_tunnel")
+int _erspan_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ __u32 index;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+ char fmt2[] = "\tindex %x\n";
+
+ index = bpf_ntohl(md.u.index);
+ bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+ char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+ bpf_trace_printk(fmt2, sizeof(fmt2),
+ md.u.md2.dir,
+ (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+ bpf_ntohl(md.u.md2.timestamp));
+#endif
+
+ return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_set_tunnel")
+int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11);
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&md, 0, sizeof(md));
+
+#ifdef ERSPAN_V1
+ md.u.index = bpf_htonl(123);
+ md.version = 1;
+#else
+ __u8 direction = 0;
+ __u8 hwid = 17;
+
+ md.version = 2;
+ md.u.md2.dir = direction;
+ md.u.md2.hwid = hwid & 0xf;
+ md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+#endif
+
+ ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_get_tunnel")
+int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ __u32 index;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+ char fmt2[] = "\tindex %x\n";
+
+ index = bpf_ntohl(md.u.index);
+ bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+ char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+ bpf_trace_printk(fmt2, sizeof(fmt2),
+ md.u.md2.dir,
+ (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+ bpf_ntohl(md.u.md2.timestamp));
+#endif
+
+ return TC_ACT_OK;
+}
+
+SEC("vxlan_set_tunnel")
+int _vxlan_set_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ struct vxlan_metadata md;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
+ ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("vxlan_get_tunnel")
+int _vxlan_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ struct vxlan_metadata md;
+ char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, md.gbp);
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6vxlan_set_tunnel")
+int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+ key.tunnel_id = 22;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6vxlan_get_tunnel")
+int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip6 ::%x label %x\n";
+ struct bpf_tunnel_key key;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+ return TC_ACT_OK;
+}
+
+SEC("geneve_set_tunnel")
+int _geneve_set_tunnel(struct __sk_buff *skb)
+{
+ int ret, ret2;
+ struct bpf_tunnel_key key;
+ struct geneve_opt gopt;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ __builtin_memset(&gopt, 0x0, sizeof(gopt));
+ gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+ gopt.type = 0x08;
+ gopt.r1 = 0;
+ gopt.r2 = 0;
+ gopt.r3 = 0;
+ gopt.length = 2; /* 4-byte multiple */
+ *(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef);
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("geneve_get_tunnel")
+int _geneve_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ struct geneve_opt gopt;
+ char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+ return TC_ACT_OK;
+}
+
+SEC("ip6geneve_set_tunnel")
+int _ip6geneve_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ struct geneve_opt gopt;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+ key.tunnel_id = 22;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&gopt, 0x0, sizeof(gopt));
+ gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+ gopt.type = 0x08;
+ gopt.r1 = 0;
+ gopt.r2 = 0;
+ gopt.r3 = 0;
+ gopt.length = 2; /* 4-byte multiple */
+ *(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef);
+
+ ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6geneve_get_tunnel")
+int _ip6geneve_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+ struct bpf_tunnel_key key;
+ struct geneve_opt gopt;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+
+ return TC_ACT_OK;
+}
+
+SEC("ipip_set_tunnel")
+int _ipip_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key = {};
+ void *data = (void *)(long)skb->data;
+ struct iphdr *iph = data;
+ struct tcphdr *tcp = data + sizeof(*iph);
+ void *data_end = (void *)(long)skb->data_end;
+ int ret;
+
+ /* single length check */
+ if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+ ERROR(1);
+ return TC_ACT_SHOT;
+ }
+
+ key.tunnel_ttl = 64;
+ if (iph->protocol == IPPROTO_ICMP) {
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ } else {
+ if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
+ return TC_ACT_SHOT;
+
+ if (tcp->dest == bpf_htons(5200))
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ else if (tcp->dest == bpf_htons(5201))
+ key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
+ else
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ipip_get_tunnel")
+int _ipip_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ char fmt[] = "remote ip 0x%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
+ return TC_ACT_OK;
+}
+
+SEC("ipip6_set_tunnel")
+int _ipip6_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key = {};
+ void *data = (void *)(long)skb->data;
+ struct iphdr *iph = data;
+ struct tcphdr *tcp = data + sizeof(*iph);
+ void *data_end = (void *)(long)skb->data_end;
+ int ret;
+
+ /* single length check */
+ if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+ ERROR(1);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ipip6_get_tunnel")
+int _ipip6_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ char fmt[] = "remote ip6 %x::%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+ bpf_htonl(key.remote_ipv6[3]));
+ return TC_ACT_OK;
+}
+
+SEC("ip6ip6_set_tunnel")
+int _ip6ip6_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key = {};
+ void *data = (void *)(long)skb->data;
+ struct ipv6hdr *iph = data;
+ struct tcphdr *tcp = data + sizeof(*iph);
+ void *data_end = (void *)(long)skb->data_end;
+ int ret;
+
+ /* single length check */
+ if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+ ERROR(1);
+ return TC_ACT_SHOT;
+ }
+
+ key.remote_ipv6[0] = bpf_htonl(0x2401db00);
+ key.tunnel_ttl = 64;
+
+ if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) {
+ key.remote_ipv6[3] = bpf_htonl(1);
+ } else {
+ if (iph->nexthdr != 6 /* NEXTHDR_TCP */) {
+ ERROR(iph->nexthdr);
+ return TC_ACT_SHOT;
+ }
+
+ if (tcp->dest == bpf_htons(5200)) {
+ key.remote_ipv6[3] = bpf_htonl(1);
+ } else if (tcp->dest == bpf_htons(5201)) {
+ key.remote_ipv6[3] = bpf_htonl(2);
+ } else {
+ ERROR(tcp->dest);
+ return TC_ACT_SHOT;
+ }
+ }
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6ip6_get_tunnel")
+int _ip6ip6_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ char fmt[] = "remote ip6 %x::%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+ bpf_htonl(key.remote_ipv6[3]));
+ return TC_ACT_OK;
+}
+
+SEC("xfrm_get_state")
+int _xfrm_get_state(struct __sk_buff *skb)
+{
+ struct bpf_xfrm_state x;
+ char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n";
+ int ret;
+
+ ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0);
+ if (ret < 0)
+ return TC_ACT_OK;
+
+ bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi),
+ bpf_ntohl(x.remote_ipv4));
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
new file mode 100644
index 000000000..858e55143
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -0,0 +1,13243 @@
+/*
+ * Testsuite for eBPF verifier
+ *
+ * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <endian.h>
+#include <asm/types.h>
+#include <linux/types.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sched.h>
+#include <limits.h>
+
+#include <sys/capability.h>
+
+#include <linux/unistd.h>
+#include <linux/filter.h>
+#include <linux/bpf_perf_event.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+
+#include <bpf/bpf.h>
+
+#ifdef HAVE_GENHDR
+# include "autoconf.h"
+#else
+# if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__)
+# define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
+# endif
+#endif
+#include "bpf_rlimit.h"
+#include "bpf_rand.h"
+#include "bpf_util.h"
+#include "../../../include/linux/filter.h"
+
+#define MAX_INSNS BPF_MAXINSNS
+#define MAX_FIXUPS 8
+#define MAX_NR_MAPS 8
+#define POINTER_VALUE 0xcafe4all
+#define TEST_DATA_LEN 64
+
+#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0)
+#define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1)
+
+#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
+static bool unpriv_disabled = false;
+
+struct bpf_test {
+ const char *descr;
+ struct bpf_insn insns[MAX_INSNS];
+ int fixup_map1[MAX_FIXUPS];
+ int fixup_map2[MAX_FIXUPS];
+ int fixup_map3[MAX_FIXUPS];
+ int fixup_map4[MAX_FIXUPS];
+ int fixup_prog1[MAX_FIXUPS];
+ int fixup_prog2[MAX_FIXUPS];
+ int fixup_map_in_map[MAX_FIXUPS];
+ int fixup_cgroup_storage[MAX_FIXUPS];
+ const char *errstr;
+ const char *errstr_unpriv;
+ uint32_t retval, retval_unpriv;
+ enum {
+ UNDEF,
+ ACCEPT,
+ REJECT
+ } result, result_unpriv;
+ enum bpf_prog_type prog_type;
+ uint8_t flags;
+ __u8 data[TEST_DATA_LEN];
+ void (*fill_helper)(struct bpf_test *self);
+};
+
+/* Note we want this to be 64 bit aligned so that the end of our array is
+ * actually the end of the structure.
+ */
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct other_val {
+ long long foo;
+ long long bar;
+};
+
+static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
+{
+ /* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */
+#define PUSH_CNT 51
+ unsigned int len = BPF_MAXINSNS;
+ struct bpf_insn *insn = self->insns;
+ int i = 0, j, k = 0;
+
+ insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+loop:
+ for (j = 0; j < PUSH_CNT; j++) {
+ insn[i++] = BPF_LD_ABS(BPF_B, 0);
+ insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2);
+ i++;
+ insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
+ insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1);
+ insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2);
+ insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_vlan_push),
+ insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2);
+ i++;
+ }
+
+ for (j = 0; j < PUSH_CNT; j++) {
+ insn[i++] = BPF_LD_ABS(BPF_B, 0);
+ insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2);
+ i++;
+ insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
+ insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_vlan_pop),
+ insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2);
+ i++;
+ }
+ if (++k < 5)
+ goto loop;
+
+ for (; i < len - 1; i++)
+ insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef);
+ insn[len - 1] = BPF_EXIT_INSN();
+}
+
+static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
+{
+ struct bpf_insn *insn = self->insns;
+ unsigned int len = BPF_MAXINSNS;
+ int i = 0;
+
+ insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+ insn[i++] = BPF_LD_ABS(BPF_B, 0);
+ insn[i] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 10, len - i - 2);
+ i++;
+ while (i < len - 1)
+ insn[i++] = BPF_LD_ABS(BPF_B, 1);
+ insn[i] = BPF_EXIT_INSN();
+}
+
+static void bpf_fill_rand_ld_dw(struct bpf_test *self)
+{
+ struct bpf_insn *insn = self->insns;
+ uint64_t res = 0;
+ int i = 0;
+
+ insn[i++] = BPF_MOV32_IMM(BPF_REG_0, 0);
+ while (i < self->retval) {
+ uint64_t val = bpf_semi_rand_get();
+ struct bpf_insn tmp[2] = { BPF_LD_IMM64(BPF_REG_1, val) };
+
+ res ^= val;
+ insn[i++] = tmp[0];
+ insn[i++] = tmp[1];
+ insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
+ }
+ insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
+ insn[i++] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32);
+ insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
+ insn[i] = BPF_EXIT_INSN();
+ res ^= (res >> 32);
+ self->retval = (uint32_t)res;
+}
+
+static struct bpf_test tests[] = {
+ {
+ "add+sub+mul",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 2),
+ BPF_MOV64_IMM(BPF_REG_2, 3),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 3),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = -3,
+ },
+ {
+ "DIV32 by 0, zero check 1",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "DIV32 by 0, zero check 2",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "DIV64 by 0, zero check",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "MOD32 by 0, zero check 1",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "MOD32 by 0, zero check 2",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "MOD64 by 0, zero check",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "DIV32 by 0, zero check ok, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 2),
+ BPF_MOV32_IMM(BPF_REG_2, 16),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 8,
+ },
+ {
+ "DIV32 by 0, zero check 1, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV32 by 0, zero check 2, cls",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV64 by 0, zero check, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "MOD32 by 0, zero check ok, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 3),
+ BPF_MOV32_IMM(BPF_REG_2, 5),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "MOD32 by 0, zero check 1, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "MOD32 by 0, zero check 2, cls",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "MOD64 by 0, zero check 1, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 2),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "MOD64 by 0, zero check 2, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, -1),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = -1,
+ },
+ /* Just make sure that JITs used udiv/umod as otherwise we get
+ * an exception from INT_MIN/-1 overflow similarly as with div
+ * by zero.
+ */
+ {
+ "DIV32 overflow, check 1",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, -1),
+ BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV32 overflow, check 2",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+ BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV64 overflow, check 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, -1),
+ BPF_LD_IMM64(BPF_REG_0, LLONG_MIN),
+ BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV64 overflow, check 2",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_0, LLONG_MIN),
+ BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "MOD32 overflow, check 1",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, -1),
+ BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = INT_MIN,
+ },
+ {
+ "MOD32 overflow, check 2",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+ BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = INT_MIN,
+ },
+ {
+ "MOD64 overflow, check 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, -1),
+ BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "MOD64 overflow, check 2",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_MOD, BPF_REG_2, -1),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "xor32 zero extend check",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_2, -1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32),
+ BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 0xffff),
+ BPF_ALU32_REG(BPF_XOR, BPF_REG_2, BPF_REG_2),
+ BPF_MOV32_IMM(BPF_REG_0, 2),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "empty prog",
+ .insns = {
+ },
+ .errstr = "unknown opcode 00",
+ .result = REJECT,
+ },
+ {
+ "only exit insn",
+ .insns = {
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "unreachable",
+ .insns = {
+ BPF_EXIT_INSN(),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unreachable",
+ .result = REJECT,
+ },
+ {
+ "unreachable2",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unreachable",
+ .result = REJECT,
+ },
+ {
+ "out of range jump",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "out of range jump2",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, -2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "test1 ld_imm64",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid BPF_LD_IMM insn",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "test2 ld_imm64",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid BPF_LD_IMM insn",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "test3 ld_imm64",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "test4 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "test5 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "test6 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "test7 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1),
+ BPF_RAW_INSN(0, 0, 0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "test8 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 1, 1),
+ BPF_RAW_INSN(0, 0, 0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "uses reserved fields",
+ .result = REJECT,
+ },
+ {
+ "test9 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1),
+ BPF_RAW_INSN(0, 0, 0, 1, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "test10 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1),
+ BPF_RAW_INSN(0, BPF_REG_1, 0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "test11 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1),
+ BPF_RAW_INSN(0, 0, BPF_REG_1, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "test12 ld_imm64",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, BPF_REG_1, 0, 1),
+ BPF_RAW_INSN(0, 0, 0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "not pointing to valid bpf_map",
+ .result = REJECT,
+ },
+ {
+ "test13 ld_imm64",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, BPF_REG_1, 0, 1),
+ BPF_RAW_INSN(0, 0, BPF_REG_1, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "arsh32 on imm",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "unknown opcode c4",
+ },
+ {
+ "arsh32 on reg",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_1, 5),
+ BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "unknown opcode cc",
+ },
+ {
+ "arsh64 on imm",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_ALU64_IMM(BPF_ARSH, BPF_REG_0, 5),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "arsh64 on reg",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_1, 5),
+ BPF_ALU64_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "no bpf_exit",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2),
+ },
+ .errstr = "not an exit",
+ .result = REJECT,
+ },
+ {
+ "loop (back-edge)",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "loop2 (back-edge)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "conditional loop",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "read uninitialized register",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R2 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "read invalid register",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R15 is invalid",
+ .result = REJECT,
+ },
+ {
+ "program doesn't init R0 before exit",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "program doesn't init R0 before exit in all branches",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 !read_ok",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "stack out of bounds",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid stack",
+ .result = REJECT,
+ },
+ {
+ "invalid call insn1",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unknown opcode 8d",
+ .result = REJECT,
+ },
+ {
+ "invalid call insn2",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_CALL uses reserved",
+ .result = REJECT,
+ },
+ {
+ "invalid function call",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 1234567),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid func unknown#1234567",
+ .result = REJECT,
+ },
+ {
+ "uninitialized stack1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 2 },
+ .errstr = "invalid indirect read from stack",
+ .result = REJECT,
+ },
+ {
+ "uninitialized stack2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -8),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid read from stack",
+ .result = REJECT,
+ },
+ {
+ "invalid fp arithmetic",
+ /* If this gets ever changed, make sure JITs can deal with it. */
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 subtraction from stack pointer",
+ .result = REJECT,
+ },
+ {
+ "non-invalid fp arithmetic",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "invalid argument register",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_cgroup_classid),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_cgroup_classid),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 !read_ok",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "non-invalid argument register",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_cgroup_classid),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_cgroup_classid),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "check valid spill/fill",
+ .insns = {
+ /* spill R1(ctx) into stack */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ /* fill it back into R2 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
+ /* should be able to access R0 = *(R2 + 8) */
+ /* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R0 leaks addr",
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .retval = POINTER_VALUE,
+ },
+ {
+ "check valid spill/fill, skb mark",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = ACCEPT,
+ },
+ {
+ "check corrupted spill/fill",
+ .insns = {
+ /* spill R1(ctx) into stack */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ /* mess up with R1 pointer on stack */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23),
+ /* fill back into R0 is fine for priv.
+ * R0 now becomes SCALAR_VALUE.
+ */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ /* Load from R0 should fail. */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .errstr = "R0 invalid mem access 'inv",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "check corrupted spill/fill, LSB",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_H, BPF_REG_10, -8, 0xcafe),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = POINTER_VALUE,
+ },
+ {
+ "check corrupted spill/fill, MSB",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x12345678),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = POINTER_VALUE,
+ },
+ {
+ "invalid src register in STX",
+ .insns = {
+ BPF_STX_MEM(BPF_B, BPF_REG_10, -1, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R15 is invalid",
+ .result = REJECT,
+ },
+ {
+ "invalid dst register in STX",
+ .insns = {
+ BPF_STX_MEM(BPF_B, 14, BPF_REG_10, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R14 is invalid",
+ .result = REJECT,
+ },
+ {
+ "invalid dst register in ST",
+ .insns = {
+ BPF_ST_MEM(BPF_B, 14, -1, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R14 is invalid",
+ .result = REJECT,
+ },
+ {
+ "invalid src register in LDX",
+ .insns = {
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, 12, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R12 is invalid",
+ .result = REJECT,
+ },
+ {
+ "invalid dst register in LDX",
+ .insns = {
+ BPF_LDX_MEM(BPF_B, 11, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R11 is invalid",
+ .result = REJECT,
+ },
+ {
+ "junk insn",
+ .insns = {
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unknown opcode 00",
+ .result = REJECT,
+ },
+ {
+ "junk insn2",
+ .insns = {
+ BPF_RAW_INSN(1, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_LDX uses reserved fields",
+ .result = REJECT,
+ },
+ {
+ "junk insn3",
+ .insns = {
+ BPF_RAW_INSN(-1, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unknown opcode ff",
+ .result = REJECT,
+ },
+ {
+ "junk insn4",
+ .insns = {
+ BPF_RAW_INSN(-1, -1, -1, -1, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unknown opcode ff",
+ .result = REJECT,
+ },
+ {
+ "junk insn5",
+ .insns = {
+ BPF_RAW_INSN(0x7f, -1, -1, -1, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_ALU uses reserved fields",
+ .result = REJECT,
+ },
+ {
+ "misaligned read from stack",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned stack access",
+ .result = REJECT,
+ },
+ {
+ "invalid map_fd for function call",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_delete_elem),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "fd 0 is not pointing to valid bpf_map",
+ .result = REJECT,
+ },
+ {
+ "don't check return value before access",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "R0 invalid mem access 'map_value_or_null'",
+ .result = REJECT,
+ },
+ {
+ "access memory with incorrect alignment",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "misaligned value access",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "sometimes access memory with incorrect alignment",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "R0 invalid mem access",
+ .errstr_unpriv = "R0 leaks addr",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "jump test 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "jump test 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 14),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 11),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 5),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "jump test 3",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 19),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 15),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -32),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 11),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -40),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 7),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -56),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_delete_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 24 },
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = -ENOENT,
+ },
+ {
+ "jump test 4",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "jump test 5",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "access skb fields ok",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, queue_mapping)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, protocol)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, vlan_present)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, vlan_tci)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, napi_id)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "access skb fields bad1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "access skb fields bad2",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "access skb fields bad3",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -12),
+ },
+ .fixup_map1 = { 6 },
+ .errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "access skb fields bad4",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 3),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -13),
+ },
+ .fixup_map1 = { 7 },
+ .errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "invalid access __sk_buff family",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, family)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "invalid access __sk_buff remote_ip4",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip4)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "invalid access __sk_buff local_ip4",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip4)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "invalid access __sk_buff remote_ip6",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "invalid access __sk_buff local_ip6",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "invalid access __sk_buff remote_port",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_port)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "invalid access __sk_buff remote_port",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_port)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "valid access __sk_buff family",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, family)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "valid access __sk_buff remote_ip4",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip4)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "valid access __sk_buff local_ip4",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip4)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "valid access __sk_buff remote_ip6",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6[3])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "valid access __sk_buff local_ip6",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6[3])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "valid access __sk_buff remote_port",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_port)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "valid access __sk_buff remote_port",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_port)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "invalid access of tc_classid for SK_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_classid)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .errstr = "invalid bpf_context access",
+ },
+ {
+ "invalid access of skb->mark for SK_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .errstr = "invalid bpf_context access",
+ },
+ {
+ "check skb->mark is not writeable by SK_SKB",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .errstr = "invalid bpf_context access",
+ },
+ {
+ "check skb->tc_index is writeable by SK_SKB",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "check skb->priority is writeable by SK_SKB",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, priority)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "direct packet read for SK_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "direct packet write for SK_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "overlapping checks for direct packet access SK_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "valid access family in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, family)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "valid access remote_ip4 in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_ip4)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "valid access local_ip4 in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_ip4)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "valid access remote_port in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_port)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "valid access local_port in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_port)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "valid access remote_ip6 in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_ip6[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_ip6[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_ip6[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_ip6[3])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "valid access local_ip6 in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_ip6[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_ip6[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_ip6[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_ip6[3])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "invalid 64B read of family in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+ offsetof(struct sk_msg_md, family)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid read past end of SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_port) + 4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 !read_ok",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "invalid read offset in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct sk_msg_md, family) + 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "direct packet read for SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+ offsetof(struct sk_msg_md, data)),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+ offsetof(struct sk_msg_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "direct packet write for SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+ offsetof(struct sk_msg_md, data)),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+ offsetof(struct sk_msg_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "overlapping checks for direct packet access SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+ offsetof(struct sk_msg_md, data)),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+ offsetof(struct sk_msg_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "check skb->mark is not writeable by sockets",
+ .insns = {
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "R1 leaks addr",
+ .result = REJECT,
+ },
+ {
+ "check skb->tc_index is not writeable by sockets",
+ .insns = {
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "R1 leaks addr",
+ .result = REJECT,
+ },
+ {
+ "check cb access: byte",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0]) + 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0]) + 2),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0]) + 3),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[1]) + 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[1]) + 2),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[1]) + 3),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[2]) + 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[2]) + 2),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[2]) + 3),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3]) + 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3]) + 2),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3]) + 3),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4]) + 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4]) + 2),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4]) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0]) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0]) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0]) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[1]) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[1]) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[1]) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2]) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2]) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2]) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[3]) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[3]) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[3]) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4]) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4]) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4]) + 3),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "__sk_buff->hash, offset 0, byte store not permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, hash)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "__sk_buff->tc_index, offset 3, byte store not permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, tc_index) + 3),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "check skb->hash byte load permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash)),
+#else
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 3),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check skb->hash byte load permitted 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check skb->hash byte load permitted 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check skb->hash byte load permitted 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 3),
+#else
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash)),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check cb access: byte, wrong type",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "check cb access: half",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0]) + 2),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[1]) + 2),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[2]) + 2),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3]) + 2),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4]) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0]) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[1]) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2]) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[3]) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4]) + 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check cb access: half, unaligned",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0]) + 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned context access",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "check __sk_buff->hash, offset 0, half store not permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, hash)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "check __sk_buff->tc_index, offset 2, half store not permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, tc_index) + 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "check skb->hash half load permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash)),
+#else
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 2),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check skb->hash half load permitted 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 2),
+#else
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash)),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check skb->hash half load not permitted, unaligned 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 1),
+#else
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 3),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "check skb->hash half load not permitted, unaligned 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 3),
+#else
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 1),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "check cb access: half, wrong type",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "check cb access: word",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check cb access: word, unaligned 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0]) + 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned context access",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "check cb access: word, unaligned 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4]) + 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned context access",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "check cb access: word, unaligned 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4]) + 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned context access",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "check cb access: word, unaligned 4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4]) + 3),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned context access",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "check cb access: double",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check cb access: double, unaligned 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned context access",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "check cb access: double, unaligned 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned context access",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "check cb access: double, oob 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "check cb access: double, oob 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "check __sk_buff->ifindex dw store not permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, ifindex)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "check __sk_buff->ifindex dw load not permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, ifindex)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "check cb access: double, wrong type",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "check out of range skb->cb access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0]) + 256),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_ACT,
+ },
+ {
+ "write skb fields from socket prog",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .errstr_unpriv = "R1 leaks addr",
+ .result_unpriv = REJECT,
+ },
+ {
+ "write skb fields from tc_cls_act prog",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "PTR_TO_STACK store/load",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 0xfaceb00c,
+ },
+ {
+ "PTR_TO_STACK store/load - bad alignment on off",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
+ },
+ {
+ "PTR_TO_STACK store/load - bad alignment on reg",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
+ },
+ {
+ "PTR_TO_STACK store/load - out of bounds low",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -80000),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid stack off=-79992 size=8",
+ .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
+ },
+ {
+ "PTR_TO_STACK store/load - out of bounds high",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid stack off=0 size=8",
+ },
+ {
+ "unpriv: return pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr",
+ .retval = POINTER_VALUE,
+ },
+ {
+ "unpriv: add const to pointer",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: add pointer to pointer",
+ .insns = {
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R1 pointer += pointer",
+ },
+ {
+ "unpriv: neg pointer",
+ .insns = {
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer arithmetic",
+ },
+ {
+ "unpriv: cmp pointer with const",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer comparison",
+ },
+ {
+ "unpriv: cmp pointer with pointer",
+ .insns = {
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R10 pointer comparison",
+ },
+ {
+ "unpriv: check that printk is disallowed",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_trace_printk),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "unknown func bpf_trace_printk#6",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: pass pointer to helper function",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_update_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr_unpriv = "R4 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: indirectly pass pointer on stack to helper function",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "invalid indirect read from stack off -8+0 size 8",
+ .result = REJECT,
+ },
+ {
+ "unpriv: mangle pointer on stack 1",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: mangle pointer on stack 2",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: read pointer from stack in small chunks",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid size",
+ .result = REJECT,
+ },
+ {
+ "unpriv: write pointer into ctx",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 leaks addr",
+ .result_unpriv = REJECT,
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "unpriv: spill/fill of ctx",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: spill/fill of ctx 2",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_hash_recalc),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "unpriv: spill/fill of ctx 3",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_hash_recalc),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R1 type=fp expected=ctx",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "unpriv: spill/fill of ctx 4",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_10,
+ BPF_REG_0, -8, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_hash_recalc),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R1 type=inv expected=ctx",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "unpriv: spill/fill of different pointers stx",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 42),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+ offsetof(struct __sk_buff, mark)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "same insn cannot be used with different pointers",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "unpriv: spill/fill of different pointers ldx",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+ -(__s32)offsetof(struct bpf_perf_event_data,
+ sample_period) - 8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1,
+ offsetof(struct bpf_perf_event_data,
+ sample_period)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "same insn cannot be used with different pointers",
+ .prog_type = BPF_PROG_TYPE_PERF_EVENT,
+ },
+ {
+ "unpriv: write pointer into map elem value",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "alu32: mov u32 const",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_7, 0),
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_7, 1),
+ BPF_MOV32_REG(BPF_REG_0, BPF_REG_7),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R7 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "unpriv: partial copy of pointer",
+ .insns = {
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R10 partial copy",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: pass pointer to tail_call",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 1 },
+ .errstr_unpriv = "R3 leaks addr into helper",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: cmp map pointer with zero",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 1 },
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: write into frame pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_10, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "frame pointer is read only",
+ .result = REJECT,
+ },
+ {
+ "unpriv: spill/fill frame pointer",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "frame pointer is read only",
+ .result = REJECT,
+ },
+ {
+ "unpriv: cmp of frame pointer",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_10, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R10 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: adding of fp, reg",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: adding of fp, imm",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: cmp of stack pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R2 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "runtime/jit: tail_call within bounds, prog once",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 1 },
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "runtime/jit: tail_call within bounds, prog loop",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 1 },
+ .result = ACCEPT,
+ .retval = 41,
+ },
+ {
+ "runtime/jit: tail_call within bounds, no prog",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "runtime/jit: tail_call out of bounds",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 256),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 1 },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "runtime/jit: pass negative index to tail_call",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, -1),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 1 },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "runtime/jit: pass > 32bit index to tail_call",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_3, 0x100000000ULL),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 2 },
+ .result = ACCEPT,
+ .retval = 42,
+ /* Verifier rewrite for unpriv skips tail call here. */
+ .retval_unpriv = 2,
+ },
+ {
+ "stack pointer arithmetic",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 4),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1),
+ BPF_ST_MEM(0, BPF_REG_2, 4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+ BPF_ST_MEM(0, BPF_REG_2, 4, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "raw_stack: no skb_load_bytes",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ /* Call to skb_load_bytes() omitted. */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid read from stack off -8+0 size 8",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, negative len",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R4 min value is negative",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, negative len 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, ~0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R4 min value is negative",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, zero len",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid stack type R3",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, no init",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, init",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xcafe),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, spilled regs around bounds",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
+ offsetof(struct __sk_buff, priority)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, spilled regs corruption",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R0 invalid mem access 'inv'",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "raw_stack: skb_load_bytes, spilled regs corruption 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
+ offsetof(struct __sk_buff, priority)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_3,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R3 invalid mem access 'inv'",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "raw_stack: skb_load_bytes, spilled regs + data",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
+ offsetof(struct __sk_buff, priority)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, invalid access 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -513),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid stack type R3 off=-513 access_size=8",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, invalid access 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid stack type R3 off=-1 access_size=8",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, invalid access 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 0xffffffff),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R4 min value is negative",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, invalid access 4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, invalid access 5",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, invalid access 6",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid stack type R3 off=-512 access_size=0",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, large access",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 512),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "context stores via ST",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_ST stores into R1 context is not allowed",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "context stores via XADD",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_1,
+ BPF_REG_0, offsetof(struct __sk_buff, mark), 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_XADD stores into R1 context is not allowed",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 15),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 7),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_3, 12),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 14),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_3, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test3",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access off=76",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+ },
+ {
+ "direct packet access: test4 (write)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test5 (pkt_end >= reg, good access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test6 (pkt_end >= reg, bad access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid access to packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test7 (pkt_end >= reg, both accesses)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid access to packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test8 (double test, variant 1)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 4),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test9 (double test, variant 2)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test10 (write invalid)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid access to packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test11 (shift, good access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 144),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 3),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 1,
+ },
+ {
+ "direct packet access: test12 (and, good access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 144),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 1,
+ },
+ {
+ "direct packet access: test13 (branches, good access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 13),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_MOV64_IMM(BPF_REG_4, 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_4, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 14),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 24),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 1,
+ },
+ {
+ "direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7),
+ BPF_MOV64_IMM(BPF_REG_5, 12),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 4),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 1,
+ },
+ {
+ "direct packet access: test15 (spill with xadd)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 4096),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_STX_XADD(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_2, BPF_REG_5, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R2 invalid mem access 'inv'",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "direct packet access: test16 (arith on data_end)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 16),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test17 (pruning, alignment)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 14),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 1, 4),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_JMP_A(-6),
+ },
+ .errstr = "misaligned packet access off 2+(0x0; 0x0)+15+-4 size 4",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "direct packet access: test18 (imm += pkt_ptr, 1)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_IMM(BPF_REG_0, 8),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test19 (imm += pkt_ptr, 2)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ BPF_MOV64_IMM(BPF_REG_4, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+ BPF_STX_MEM(BPF_B, BPF_REG_4, BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test20 (x += pkt_ptr, 1)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0x7fff),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "direct packet access: test21 (x += pkt_ptr, 2)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9),
+ BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0x7fff),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "direct packet access: test22 (x += pkt_ptr, 3)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_3, -16),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 11),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
+ BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
+ BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 49),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_STX_MEM(BPF_H, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "direct packet access: test23 (x += pkt_ptr, 4)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_0, 31),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "direct packet access: test24 (x += pkt_ptr, 5)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_0, 64),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7fff - 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "direct packet access: test25 (marking on <, good access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -4),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test26 (marking on <, bad access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -3),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test27 (marking on <=, good access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 1,
+ },
+ {
+ "direct packet access: test28 (marking on <=, bad access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -4),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test1, valid packet_ptr range",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_update_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 5 },
+ .result_unpriv = ACCEPT,
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "helper access to packet: test2, unchecked packet_ptr",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "helper access to packet: test3, variable add",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
+ BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 11 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "helper access to packet: test4, packet_ptr with bad range",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 7 },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "helper access to packet: test5, packet_ptr with too short range",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 6 },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "helper access to packet: test6, cls valid packet_ptr range",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_update_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 5 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test7, cls unchecked packet_ptr",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test8, cls variable add",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
+ BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 11 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test9, cls packet_ptr with bad range",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 7 },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test10, cls packet_ptr with too short range",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 6 },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test11, cls unsuitable helper 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_7, 4),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 42),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_store_bytes),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "helper access to the packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test12, cls unsuitable helper 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 3),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "helper access to the packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test13, cls helper ok",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test14, cls helper ok sub",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 4),
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test15, cls helper fail sub",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 12),
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test16, cls helper fail range 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test17, cls helper fail range 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, -9),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R2 min value is negative",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test18, cls helper fail range 3",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, ~0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R2 min value is negative",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test19, cls helper range zero",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test20, pkt end as input",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R1 type=pkt_end expected=fp",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test21, wrong reg",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "valid map access into an array with a constant",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "valid map access into an array with a register",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_1, 4),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "valid map access into an array with a variable",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 3),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "valid map access into an array with a signed variable",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
+ BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid map access into an array with a constant",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, (MAX_ENTRIES + 1) << 2,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "invalid access to map value, value_size=48 off=48 size=8",
+ .result = REJECT,
+ },
+ {
+ "invalid map access into an array with a register",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_1, MAX_ENTRIES + 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 min value is outside of the array range",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid map access into an array with a variable",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid map access into an array with no floor check",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
+ BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .errstr = "R0 unbounded memory access",
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid map access into an array with a invalid max check",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES + 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .errstr = "invalid access to map value, value_size=48 off=44 size=8",
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid map access into an array with a invalid max check",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3, 11 },
+ .errstr = "R0 pointer += pointer",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "valid cgroup storage access",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 1 },
+ .result = REJECT,
+ .errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "fd 1 is not pointing to valid bpf_map",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid per-cgroup storage access 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=64 off=256 size=4",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=64 off=-2 size=4",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid cgroup storage access 5",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 7),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "get_local_storage() doesn't support non-zero flags",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 6",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "get_local_storage() doesn't support non-zero flags",
+ .errstr_unpriv = "R2 leaks addr into helper function",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "multiple registers share map_lookup_elem result",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS
+ },
+ {
+ "alu ops on ptr_to_map_value_or_null, 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS
+ },
+ {
+ "alu ops on ptr_to_map_value_or_null, 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS
+ },
+ {
+ "alu ops on ptr_to_map_value_or_null, 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS
+ },
+ {
+ "invalid memory access with multiple map_lookup_elem calls",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .result = REJECT,
+ .errstr = "R4 !read_ok",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS
+ },
+ {
+ "valid indirect map_lookup_elem access with 2nd lookup in branch",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_2, 10),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS
+ },
+ {
+ "invalid map access from else condition",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES-1, 1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 unbounded memory access",
+ .result = REJECT,
+ .errstr_unpriv = "R0 leaks addr",
+ .result_unpriv = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "constant register |= constant should keep constant type",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
+ BPF_MOV64_IMM(BPF_REG_2, 34),
+ BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 13),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "constant register |= constant should not bypass stack boundary checks",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
+ BPF_MOV64_IMM(BPF_REG_2, 34),
+ BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 24),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid stack type R1 off=-48 access_size=58",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "constant register |= constant register should keep constant type",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
+ BPF_MOV64_IMM(BPF_REG_2, 34),
+ BPF_MOV64_IMM(BPF_REG_4, 13),
+ BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "constant register |= constant register should not bypass stack boundary checks",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
+ BPF_MOV64_IMM(BPF_REG_2, 34),
+ BPF_MOV64_IMM(BPF_REG_4, 24),
+ BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid stack type R1 off=-48 access_size=58",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "invalid direct packet write for LWT_IN",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "cannot write into packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "invalid direct packet write for LWT_OUT",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "cannot write into packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_OUT,
+ },
+ {
+ "direct packet write for LWT_XMIT",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_LWT_XMIT,
+ },
+ {
+ "direct packet read for LWT_IN",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "direct packet read for LWT_OUT",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_LWT_OUT,
+ },
+ {
+ "direct packet read for LWT_XMIT",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_LWT_XMIT,
+ },
+ {
+ "overlapping checks for direct packet access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_LWT_XMIT,
+ },
+ {
+ "make headroom for LWT_XMIT",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_2, 34),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_skb_change_head),
+ /* split for s390 to succeed */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, 42),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_skb_change_head),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_LWT_XMIT,
+ },
+ {
+ "invalid access of tc_classid for LWT_IN",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_classid)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ },
+ {
+ "invalid access of tc_classid for LWT_OUT",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_classid)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ },
+ {
+ "invalid access of tc_classid for LWT_XMIT",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_classid)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ },
+ {
+ "leak pointer into ctx 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 2 },
+ .errstr_unpriv = "R2 leaks addr into mem",
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .errstr = "BPF_XADD stores into R1 context is not allowed",
+ },
+ {
+ "leak pointer into ctx 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R10 leaks addr into mem",
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .errstr = "BPF_XADD stores into R1 context is not allowed",
+ },
+ {
+ "leak pointer into ctx 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 1 },
+ .errstr_unpriv = "R2 leaks addr into ctx",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "leak pointer into map val",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .errstr_unpriv = "R6 leaks addr into mem",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "helper access to map: full range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: partial range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: empty range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_EMIT_CALL(BPF_FUNC_trace_printk),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "invalid access to map value, value_size=48 off=0 size=0",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: out-of-bound range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) + 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "invalid access to map value, value_size=48 off=0 size=56",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: negative range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R2 min value is negative",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const imm): full range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_2,
+ sizeof(struct test_val) -
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const imm): partial range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const imm): empty range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_EMIT_CALL(BPF_FUNC_trace_printk),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "invalid access to map value, value_size=48 off=4 size=0",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const imm): out-of-bound range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_2,
+ sizeof(struct test_val) -
+ offsetof(struct test_val, foo) + 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "invalid access to map value, value_size=48 off=4 size=52",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const imm): negative range (> adjustment)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R2 min value is negative",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const imm): negative range (< adjustment)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R2 min value is negative",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const reg): full range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3,
+ offsetof(struct test_val, foo)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2,
+ sizeof(struct test_val) -
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const reg): partial range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3,
+ offsetof(struct test_val, foo)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const reg): empty range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_EMIT_CALL(BPF_FUNC_trace_printk),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R1 min value is outside of the array range",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const reg): out-of-bound range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3,
+ offsetof(struct test_val, foo)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2,
+ sizeof(struct test_val) -
+ offsetof(struct test_val, foo) + 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "invalid access to map value, value_size=48 off=4 size=52",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const reg): negative range (> adjustment)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3,
+ offsetof(struct test_val, foo)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R2 min value is negative",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const reg): negative range (< adjustment)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3,
+ offsetof(struct test_val, foo)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R2 min value is negative",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via variable): full range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+ offsetof(struct test_val, foo), 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2,
+ sizeof(struct test_val) -
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via variable): partial range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+ offsetof(struct test_val, foo), 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via variable): empty range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+ offsetof(struct test_val, foo), 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_EMIT_CALL(BPF_FUNC_trace_printk),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R1 min value is outside of the array range",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via variable): no max check",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R1 unbounded memory access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via variable): wrong max check",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+ offsetof(struct test_val, foo), 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2,
+ sizeof(struct test_val) -
+ offsetof(struct test_val, foo) + 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "invalid access to map value, value_size=48 off=4 size=45",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using <, good access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using <, bad access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = REJECT,
+ .errstr = "R1 unbounded memory access",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using <=, good access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using <=, bad access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = REJECT,
+ .errstr = "R1 unbounded memory access",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using s<, good access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 0, -3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using s<, good access 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using s<, bad access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = REJECT,
+ .errstr = "R1 min value is negative",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using s<=, good access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 0, -3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using s<=, good access 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using s<=, bad access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = REJECT,
+ .errstr = "R1 min value is negative",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map lookup helper access to map",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 8 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map update helper access to map",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 10 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map update helper access to map: wrong size",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .fixup_map3 = { 10 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=8 off=0 size=16",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const imm)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+ offsetof(struct other_val, bar)),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 9 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const imm): out-of-bound 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+ sizeof(struct other_val) - 4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 9 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=16 off=12 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const imm): out-of-bound 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 9 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=16 off=-4 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const reg)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3,
+ offsetof(struct other_val, bar)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 10 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const reg): out-of-bound 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3,
+ sizeof(struct other_val) - 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 10 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=16 off=12 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const reg): out-of-bound 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3, -4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 10 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=16 off=-4 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via variable)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+ offsetof(struct other_val, bar), 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 11 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via variable): no max check",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 10 },
+ .result = REJECT,
+ .errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via variable): wrong max check",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+ offsetof(struct other_val, bar) + 1, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 11 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=16 off=9 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map element value is preserved across register spilling",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ },
+ {
+ "map element value or null is marked on register spilling",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -152),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ },
+ {
+ "map element value store of cleared call register",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R1 !read_ok",
+ .errstr = "R1 !read_ok",
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ },
+ {
+ "map element value with unaligned store",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 17),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 43),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, -2, 44),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 32),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, 2, 33),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, -2, 34),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 5),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 22),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, 4, 23),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, -7, 24),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 22),
+ BPF_ST_MEM(BPF_DW, BPF_REG_7, 4, 23),
+ BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "map element value with unaligned load",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 9),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 2),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 5),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "map element value illegal alu op, 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 bitwise operator &= on pointer",
+ .result = REJECT,
+ },
+ {
+ "map element value illegal alu op, 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 32-bit pointer arithmetic prohibited",
+ .result = REJECT,
+ },
+ {
+ "map element value illegal alu op, 3",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, 42),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 pointer arithmetic with /= operator",
+ .result = REJECT,
+ },
+ {
+ "map element value illegal alu op, 4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ENDIAN(BPF_FROM_BE, BPF_REG_0, 64),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 pointer arithmetic prohibited",
+ .errstr = "invalid mem access 'inv'",
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "map element value illegal alu op, 5",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_MOV64_IMM(BPF_REG_3, 4096),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_STX_XADD(BPF_DW, BPF_REG_2, BPF_REG_3, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 invalid mem access 'inv'",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "map element value is preserved across register spilling",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0,
+ offsetof(struct test_val, foo)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "helper access to variable memory: stack, bitwise AND + JMP, correct bounds",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_MOV64_IMM(BPF_REG_2, 16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, bitwise AND, zero included",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid indirect read from stack off -64+0 size 64",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, bitwise AND + JMP, wrong max",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 65),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid stack type R1 off=-64 access_size=65",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, JMP, correct bounds",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_MOV64_IMM(BPF_REG_2, 16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, JMP (signed), correct bounds",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_MOV64_IMM(BPF_REG_2, 16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, JMP, bounds + offset",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 5),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid stack type R1 off=-64 access_size=65",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, JMP, wrong max",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 65, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid stack type R1 off=-64 access_size=65",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, JMP, no max check",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ /* because max wasn't checked, signed min is negative */
+ .errstr = "R2 min value is negative, either use unsigned or 'var &= const'",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, JMP, no min check",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid indirect read from stack off -64+0 size 64",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, JMP (signed), no min check",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R2 min value is negative",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: map, JMP, correct bounds",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
+ sizeof(struct test_val), 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: map, JMP, wrong max",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
+ sizeof(struct test_val) + 1, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 4 },
+ .errstr = "invalid access to map value, value_size=48 off=0 size=49",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: map adjusted, JMP, correct bounds",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
+ BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
+ sizeof(struct test_val) - 20, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: map adjusted, JMP, wrong max",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
+ sizeof(struct test_val) - 19, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 4 },
+ .errstr = "R1 min value is outside of the array range",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: size = 0 allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 type=inv expected=fp",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to variable memory: size = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to variable memory: size = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 7),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to variable memory: size possible = 0 allowed on != NULL packet pointer (ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 0 /* csum_diff of 64-byte packet */,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 type=inv expected=fp",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: size > 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 type=inv expected=fp",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: size = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: size = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: 8 bytes leak",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid indirect read from stack off -64+32 size 64",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: 8 bytes no leak (init memory)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 32),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 32),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "invalid and of negative number",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, -4),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 max value is outside of the array range",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid range check",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 12),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+ BPF_ALU32_IMM(BPF_MOD, BPF_REG_1, 2),
+ BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_ALU32_REG(BPF_AND, BPF_REG_9, BPF_REG_1),
+ BPF_ALU32_IMM(BPF_ADD, BPF_REG_9, 1),
+ BPF_ALU32_IMM(BPF_RSH, BPF_REG_9, 1),
+ BPF_MOV32_IMM(BPF_REG_3, 1),
+ BPF_ALU32_REG(BPF_SUB, BPF_REG_3, BPF_REG_9),
+ BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0x10000000),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
+ BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
+ BPF_MOV64_REG(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 max value is outside of the array range",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "map in map access",
+ .insns = {
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_in_map = { 3 },
+ .result = ACCEPT,
+ },
+ {
+ "invalid inner map pointer",
+ .insns = {
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_in_map = { 3 },
+ .errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
+ .result = REJECT,
+ },
+ {
+ "forgot null checking on the inner map pointer",
+ .insns = {
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_in_map = { 3 },
+ .errstr = "R1 type=map_value_or_null expected=map_ptr",
+ .result = REJECT,
+ },
+ {
+ "ld_abs: check calling conv, r1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_LD_ABS(BPF_W, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_abs: check calling conv, r2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_ABS(BPF_W, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R2 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_abs: check calling conv, r3",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_ABS(BPF_W, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R3 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_abs: check calling conv, r4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_LD_ABS(BPF_W, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R4 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_abs: check calling conv, r5",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_LD_ABS(BPF_W, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R5 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_abs: check calling conv, r7",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_7, 0),
+ BPF_LD_ABS(BPF_W, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "ld_abs: tests on r6 and skb data reload helper",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_vlan_push),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 42 /* ultimate return value */,
+ },
+ {
+ "ld_ind: check calling conv, r1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_ind: check calling conv, r2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R2 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_ind: check calling conv, r3",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R3 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_ind: check calling conv, r4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_4, 1),
+ BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R4 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_ind: check calling conv, r5",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R5 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_ind: check calling conv, r7",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_7, 1),
+ BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "check bpf_perf_event_data->sample_period byte load permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_perf_event_data, sample_period)),
+#else
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_perf_event_data, sample_period) + 7),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_PERF_EVENT,
+ },
+ {
+ "check bpf_perf_event_data->sample_period half load permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_perf_event_data, sample_period)),
+#else
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_perf_event_data, sample_period) + 6),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_PERF_EVENT,
+ },
+ {
+ "check bpf_perf_event_data->sample_period word load permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_perf_event_data, sample_period)),
+#else
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_perf_event_data, sample_period) + 4),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_PERF_EVENT,
+ },
+ {
+ "check bpf_perf_event_data->sample_period dword load permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_perf_event_data, sample_period)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_PERF_EVENT,
+ },
+ {
+ "check skb->data half load not permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+#else
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data) + 2),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ },
+ {
+ "check skb->tc_classid half load not permitted for lwt prog",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_classid)),
+#else
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_classid) + 2),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "bounds checks mixing signed and unsigned, positive bounds",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, 2),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 3),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 4, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 2",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
+ BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 4),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
+ BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 4",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 5",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 4),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 6",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -512),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_6, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_6, 5),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_4, 1, 4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_ST_MEM(BPF_H, BPF_REG_10, -512, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R4 min value is negative, either use unsigned",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 7",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 8",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 9",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_LD_IMM64(BPF_REG_2, -9223372036854775808ULL),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 10",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 11",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+ /* Dead branch. */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 12",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -6),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 13",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, 2),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+ BPF_MOV64_IMM(BPF_REG_7, 1),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 4, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_7),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 14",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_MOV64_IMM(BPF_REG_8, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 42, 6),
+ BPF_JMP_REG(BPF_JSGT, BPF_REG_8, BPF_REG_1, 3),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -7),
+ },
+ .fixup_map1 = { 4 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 15",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -6),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ },
+ {
+ "subtraction bounds (map value) variant 1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 7),
+ BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 5),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 56),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "R0 max value is outside of the array range",
+ .result = REJECT,
+ },
+ {
+ "subtraction bounds (map value) variant 2",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 6),
+ BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 4),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
+ .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
+ .result = REJECT,
+ },
+ {
+ "bounds check based on zero-extended MOV",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ /* r2 = 0x0000'0000'ffff'ffff */
+ BPF_MOV32_IMM(BPF_REG_2, 0xffffffff),
+ /* r2 = 0 */
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
+ /* no-op */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ /* access at offset 0 */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT
+ },
+ {
+ "bounds check based on sign-extended MOV. test1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ /* r2 = 0xffff'ffff'ffff'ffff */
+ BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
+ /* r2 = 0xffff'ffff */
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
+ /* r0 = <oob pointer> */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ /* access to OOB pointer */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "map_value pointer and 4294967295",
+ .result = REJECT
+ },
+ {
+ "bounds check based on sign-extended MOV. test2",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ /* r2 = 0xffff'ffff'ffff'ffff */
+ BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
+ /* r2 = 0xfff'ffff */
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36),
+ /* r0 = <oob pointer> */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ /* access to OOB pointer */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "R0 min value is outside of the array range",
+ .result = REJECT
+ },
+ {
+ "bounds check based on reg_off + var_off + insn_off. test1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .errstr = "value_size=8 off=1073741825",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "bounds check based on reg_off + var_off + insn_off. test2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .errstr = "value 1073741823",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "bounds check after truncation of non-boundary-crossing range",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ /* r1 = [0x00, 0xff] */
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ /* r2 = 0x10'0000'0000 */
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36),
+ /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+ /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+ /* r1 = [0x00, 0xff] */
+ BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff),
+ /* r1 = 0 */
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+ /* no-op */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ /* access at offset 0 */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT
+ },
+ {
+ "bounds check after truncation of boundary-crossing range (1)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ /* r1 = [0x00, 0xff] */
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+ /* r1 = [0xffff'ff80, 0x1'0000'007f] */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+ /* r1 = [0xffff'ff80, 0xffff'ffff] or
+ * [0x0000'0000, 0x0000'007f]
+ */
+ BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+ /* r1 = [0x00, 0xff] or
+ * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
+ */
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+ /* r1 = 0 or
+ * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
+ */
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+ /* no-op or OOB pointer computation */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ /* potentially OOB access */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ /* not actually fully unbounded, but the bound is very high */
+ .errstr = "R0 unbounded memory access",
+ .result = REJECT
+ },
+ {
+ "bounds check after truncation of boundary-crossing range (2)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ /* r1 = [0x00, 0xff] */
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+ /* r1 = [0xffff'ff80, 0x1'0000'007f] */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+ /* r1 = [0xffff'ff80, 0xffff'ffff] or
+ * [0x0000'0000, 0x0000'007f]
+ * difference to previous test: truncation via MOV32
+ * instead of ALU32.
+ */
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+ /* r1 = [0x00, 0xff] or
+ * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
+ */
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+ /* r1 = 0 or
+ * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
+ */
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+ /* no-op or OOB pointer computation */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ /* potentially OOB access */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ /* not actually fully unbounded, but the bound is very high */
+ .errstr = "R0 unbounded memory access",
+ .result = REJECT
+ },
+ {
+ "bounds check after wrapping 32-bit addition",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ /* r1 = 0x7fff'ffff */
+ BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff),
+ /* r1 = 0xffff'fffe */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+ /* r1 = 0 */
+ BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2),
+ /* no-op */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ /* access at offset 0 */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT
+ },
+ {
+ "bounds check after shift with oversized count operand",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_IMM(BPF_REG_2, 32),
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ /* r1 = (u32)1 << (u32)32 = ? */
+ BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
+ /* r1 = [0x0000, 0xffff] */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff),
+ /* computes unknown pointer, potentially OOB */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ /* potentially OOB access */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "R0 max value is outside of the array range",
+ .result = REJECT
+ },
+ {
+ "bounds check after right shift of maybe-negative number",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ /* r1 = [0x00, 0xff] */
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ /* r1 = [-0x01, 0xfe] */
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+ /* r1 = 0 or 0xff'ffff'ffff'ffff */
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+ /* r1 = 0 or 0xffff'ffff'ffff */
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+ /* computes unknown pointer, potentially OOB */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ /* potentially OOB access */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "R0 unbounded memory access",
+ .result = REJECT
+ },
+ {
+ "bounds check map access with off+size signed 32bit overflow. test1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_JMP_A(0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "map_value pointer and 2147483646",
+ .result = REJECT
+ },
+ {
+ "bounds check map access with off+size signed 32bit overflow. test2",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_JMP_A(0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "pointer offset 1073741822",
+ .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
+ .result = REJECT
+ },
+ {
+ "bounds check map access with off+size signed 32bit overflow. test3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
+ BPF_JMP_A(0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "pointer offset -1073741822",
+ .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
+ .result = REJECT
+ },
+ {
+ "bounds check map access with off+size signed 32bit overflow. test4",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_1, 1000000),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
+ BPF_JMP_A(0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "map_value pointer and 1000000000000",
+ .result = REJECT
+ },
+ {
+ "pointer/scalar confusion in state equality check (way 1)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_JMP_A(1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ BPF_JMP_A(0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ .retval = POINTER_VALUE,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr as return value"
+ },
+ {
+ "pointer/scalar confusion in state equality check (way 2)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ BPF_JMP_A(1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ .retval = POINTER_VALUE,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr as return value"
+ },
+ {
+ "variable-offset ctx access",
+ .insns = {
+ /* Get an unknown value */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ /* add it to skb. We now have either &skb->len or
+ * &skb->pkt_type, but we don't know which
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+ /* dereference it */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "variable ctx access var_off=(0x0; 0x4)",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "variable-offset stack access",
+ .insns = {
+ /* Fill the top 8 bytes of the stack */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
+ /* add it to fp. We now have either fp-4 or fp-8, but
+ * we don't know which
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* dereference it */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "variable stack access var_off=(0xfffffffffffffff8; 0x4)",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "indirect variable-offset stack access, out of bound",
+ .insns = {
+ /* Fill the top 8 bytes of the stack */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
+ /* add it to fp. We now have either fp-4 or fp-8, but
+ * we don't know which
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* dereference it indirectly */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 5 },
+ .errstr = "invalid stack type R2 var_off",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "indirect variable-offset stack access, max_off+size > max_initialized",
+ .insns = {
+ /* Fill only the second from top 8 bytes of the stack. */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ /* Get an unknown value. */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned. */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+ /* Add it to fp. We now have either fp-12 or fp-16, but we don't know
+ * which. fp-12 size 8 is partially uninitialized stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* Dereference it indirectly. */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 5 },
+ .errstr = "invalid indirect read from stack var_off",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "indirect variable-offset stack access, min_off < min_initialized",
+ .insns = {
+ /* Fill only the top 8 bytes of the stack. */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned. */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+ /* Add it to fp. We now have either fp-12 or fp-16, but we don't know
+ * which. fp-16 size 8 is partially uninitialized stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* Dereference it indirectly. */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 5 },
+ .errstr = "invalid indirect read from stack var_off",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "indirect variable-offset stack access, ok",
+ .insns = {
+ /* Fill the top 16 bytes of the stack. */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value. */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned. */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+ /* Add it to fp. We now have either fp-12 or fp-16, we don't know
+ * which, but either way it points to initialized stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* Dereference it indirectly. */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 6 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "direct stack access with 32-bit wraparound. test1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_EXIT_INSN()
+ },
+ .errstr = "fp pointer and 2147483647",
+ .result = REJECT
+ },
+ {
+ "direct stack access with 32-bit wraparound. test2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_EXIT_INSN()
+ },
+ .errstr = "fp pointer and 1073741823",
+ .result = REJECT
+ },
+ {
+ "direct stack access with 32-bit wraparound. test3",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_EXIT_INSN()
+ },
+ .errstr = "fp pointer offset 1073741822",
+ .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
+ .result = REJECT
+ },
+ {
+ "liveness pruning and write screening",
+ .insns = {
+ /* Get an unknown value */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* branch conditions teach us nothing about R2 */
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 !read_ok",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "varlen_map_value_access pruning",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
+ BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .errstr = "R0 unbounded memory access",
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid 64-bit BPF_END",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ {
+ .code = BPF_ALU64 | BPF_END | BPF_TO_LE,
+ .dst_reg = BPF_REG_0,
+ .src_reg = 0,
+ .off = 0,
+ .imm = 32,
+ },
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unknown opcode d7",
+ .result = REJECT,
+ },
+ {
+ "XDP, using ifindex from netdev",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, ingress_ifindex)),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 1, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .retval = 1,
+ },
+ {
+ "meta access, test1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet, off=-8",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test3",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test4",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test5",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_4, 3),
+ BPF_MOV64_IMM(BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_xdp_adjust_meta),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R3 !read_ok",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test6",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test7",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test8",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test9",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test10",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_IMM(BPF_REG_5, 42),
+ BPF_MOV64_IMM(BPF_REG_6, 24),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8),
+ BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_5),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_5, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test11",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_IMM(BPF_REG_5, 42),
+ BPF_MOV64_IMM(BPF_REG_6, 24),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8),
+ BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_5),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_5, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test12",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 5),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "arithmetic ops make PTR_TO_CTX unusable",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+ offsetof(struct __sk_buff, data) -
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "dereference of modified ctx ptr",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "pkt_end - pkt_start is allowed",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = TEST_DATA_LEN,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "XDP pkt read, pkt_end mangling, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "XDP pkt read, pkt_end mangling, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_3, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "XDP pkt read, pkt_data' > pkt_end, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' > pkt_end, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' > pkt_end, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end > pkt_data', good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end > pkt_data', bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end > pkt_data', bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' < pkt_end, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' < pkt_end, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' < pkt_end, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end < pkt_data', good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end < pkt_data', bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end < pkt_data', bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' >= pkt_end, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' >= pkt_end, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' >= pkt_end, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end >= pkt_data', good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end >= pkt_data', bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end >= pkt_data', bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' <= pkt_end, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' <= pkt_end, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' <= pkt_end, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end <= pkt_data', good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end <= pkt_data', bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end <= pkt_data', bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' > pkt_data, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' > pkt_data, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' > pkt_data, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data > pkt_meta', good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data > pkt_meta', bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data > pkt_meta', bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' < pkt_data, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' < pkt_data, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' < pkt_data, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data < pkt_meta', good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data < pkt_meta', bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data < pkt_meta', bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' >= pkt_data, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' >= pkt_data, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' >= pkt_data, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data >= pkt_meta', good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data >= pkt_meta', bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data >= pkt_meta', bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' <= pkt_data, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' <= pkt_data, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' <= pkt_data, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data <= pkt_meta', good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data <= pkt_meta', bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data <= pkt_meta', bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "check deducing bounds from const, 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 0),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
+ .errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
+ },
+ {
+ "check deducing bounds from const, 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 1, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "check deducing bounds from const, 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
+ .errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
+ },
+ {
+ "check deducing bounds from const, 4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R6 has pointer with unsupported alu operation",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "check deducing bounds from const, 5",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
+ .errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
+ },
+ {
+ "check deducing bounds from const, 6",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
+ .errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
+ },
+ {
+ "check deducing bounds from const, 7",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, ~0),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
+ .errstr = "dereference of modified ctx ptr",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "check deducing bounds from const, 8",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, ~0),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
+ .errstr = "dereference of modified ctx ptr",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "check deducing bounds from const, 9",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
+ .errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
+ },
+ {
+ "check deducing bounds from const, 10",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0),
+ /* Marks reg as unknown. */
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "math between ctx pointer and register with unbounded min value is not allowed",
+ .result = REJECT,
+ },
+ {
+ "bpf_exit with invalid return code. test1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 has value (0x0; 0xffffffff)",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test3",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 has value (0x0; 0x3)",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test5",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 has value (0x2; 0x0)",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test6",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 is not a known value (ctx)",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test7",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4),
+ BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 has unknown scalar value",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "calls: basic sanity",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ },
+ {
+ "calls: not on unpriviledged",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "calls: div by 0 in subprog",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(BPF_REG_2, 0),
+ BPF_MOV32_IMM(BPF_REG_3, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_3, BPF_REG_2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "calls: multiple ret types in subprog 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "R0 invalid mem access 'inv'",
+ },
+ {
+ "calls: multiple ret types in subprog 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6,
+ offsetof(struct __sk_buff, data)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 16 },
+ .result = REJECT,
+ .errstr = "R0 min value is outside of the array range",
+ },
+ {
+ "calls: overlapping caller/callee",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "last insn is not an exit or jmp",
+ .result = REJECT,
+ },
+ {
+ "calls: wrong recursive calls",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "calls: wrong src reg",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "BPF_CALL uses reserved fields",
+ .result = REJECT,
+ },
+ {
+ "calls: wrong off value",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, -1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "BPF_CALL uses reserved fields",
+ .result = REJECT,
+ },
+ {
+ "calls: jump back loop",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge from insn 0 to 0",
+ .result = REJECT,
+ },
+ {
+ "calls: conditional call",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "calls: conditional call 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ },
+ {
+ "calls: conditional call 3",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge from insn",
+ .result = REJECT,
+ },
+ {
+ "calls: conditional call 4",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ },
+ {
+ "calls: conditional call 5",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge from insn",
+ .result = REJECT,
+ },
+ {
+ "calls: conditional call 6",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge from insn",
+ .result = REJECT,
+ },
+ {
+ "calls: using r0 returned by callee",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ },
+ {
+ "calls: using uninit r0 from callee",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "!read_ok",
+ .result = REJECT,
+ },
+ {
+ "calls: callee is using r1",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_ACT,
+ .result = ACCEPT,
+ .retval = TEST_DATA_LEN,
+ },
+ {
+ "calls: callee using args1",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "allowed for root only",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = POINTER_VALUE,
+ },
+ {
+ "calls: callee using wrong args2",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "R2 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "calls: callee using two args",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+ offsetof(struct __sk_buff, len)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6,
+ offsetof(struct __sk_buff, len)),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "allowed for root only",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN,
+ },
+ {
+ "calls: callee changing pkt pointers",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_8, BPF_REG_7, 2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ /* clear_all_pkt_pointers() has to walk all frames
+ * to make sure that pkt pointers in the caller
+ * are cleared when callee is calling a helper that
+ * adjusts packet size
+ */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_xdp_adjust_head),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R6 invalid mem access 'inv'",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: two calls with args",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = TEST_DATA_LEN + TEST_DATA_LEN,
+ },
+ {
+ "calls: calls with stack arith",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "calls: calls with misaligned stack access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -61),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63),
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ .errstr = "misaligned stack access",
+ .result = REJECT,
+ },
+ {
+ "calls: calls control flow, jump test",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 43),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 43,
+ },
+ {
+ "calls: calls control flow, jump test 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 43),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "jump out of range from insn 1 to 4",
+ .result = REJECT,
+ },
+ {
+ "calls: two calls with bad jump",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "jump out of range from insn 11 to 9",
+ .result = REJECT,
+ },
+ {
+ "calls: recursive call. test1",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "calls: recursive call. test2",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "calls: unreachable code",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "unreachable insn 6",
+ .result = REJECT,
+ },
+ {
+ "calls: invalid call",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -4),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "invalid destination",
+ .result = REJECT,
+ },
+ {
+ "calls: invalid call 2",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0x7fffffff),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "invalid destination",
+ .result = REJECT,
+ },
+ {
+ "calls: jumping across function bodies. test1",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "calls: jumping across function bodies. test2",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "calls: call without exit",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -2),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "not an exit",
+ .result = REJECT,
+ },
+ {
+ "calls: call into middle of ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "last insn",
+ .result = REJECT,
+ },
+ {
+ "calls: call into middle of other call",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "last insn",
+ .result = REJECT,
+ },
+ {
+ "calls: ld_abs with changing ctx data in callee",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_vlan_push),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed",
+ .result = REJECT,
+ },
+ {
+ "calls: two calls with bad fallthrough",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "not an exit",
+ .result = REJECT,
+ },
+ {
+ "calls: two calls with stack read",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = ACCEPT,
+ },
+ {
+ "calls: two calls with stack write",
+ .insns = {
+ /* main prog */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 7),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_8),
+ /* write into stack frame of main prog */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* read from stack frame of main prog */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = ACCEPT,
+ },
+ {
+ "calls: stack overflow using two frames (pre-call access)",
+ .insns = {
+ /* prog 1 */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+
+ /* prog 2 */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .errstr = "combined stack size",
+ .result = REJECT,
+ },
+ {
+ "calls: stack overflow using two frames (post-call access)",
+ .insns = {
+ /* prog 1 */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 2),
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+ BPF_EXIT_INSN(),
+
+ /* prog 2 */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .errstr = "combined stack size",
+ .result = REJECT,
+ },
+ {
+ "calls: stack depth check using three frames. test1",
+ .insns = {
+ /* main */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* A */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0),
+ BPF_EXIT_INSN(),
+ /* B */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ /* stack_main=32, stack_A=256, stack_B=64
+ * and max(main+A, main+A+B) < 512
+ */
+ .result = ACCEPT,
+ },
+ {
+ "calls: stack depth check using three frames. test2",
+ .insns = {
+ /* main */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* A */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0),
+ BPF_EXIT_INSN(),
+ /* B */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ /* stack_main=32, stack_A=64, stack_B=256
+ * and max(main+A, main+A+B) < 512
+ */
+ .result = ACCEPT,
+ },
+ {
+ "calls: stack depth check using three frames. test3",
+ .insns = {
+ /* main */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 8), /* call B */
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_6, 0, 1),
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* A */
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 10, 1),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -224, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -3),
+ /* B */
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 1),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -6), /* call A */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ /* stack_main=64, stack_A=224, stack_B=256
+ * and max(main+A, main+A+B) > 512
+ */
+ .errstr = "combined stack",
+ .result = REJECT,
+ },
+ {
+ "calls: stack depth check using three frames. test4",
+ /* void main(void) {
+ * func1(0);
+ * func1(1);
+ * func2(1);
+ * }
+ * void func1(int alloc_or_recurse) {
+ * if (alloc_or_recurse) {
+ * frame_pointer[-300] = 1;
+ * } else {
+ * func2(alloc_or_recurse);
+ * }
+ * }
+ * void func2(int alloc_or_recurse) {
+ * if (alloc_or_recurse) {
+ * frame_pointer[-300] = 1;
+ * }
+ * }
+ */
+ .insns = {
+ /* main */
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 7), /* call B */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* A */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2),
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */
+ BPF_EXIT_INSN(),
+ /* B */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = REJECT,
+ .errstr = "combined stack",
+ },
+ {
+ "calls: stack depth check using three frames. test5",
+ .insns = {
+ /* main */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call A */
+ BPF_EXIT_INSN(),
+ /* A */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */
+ BPF_EXIT_INSN(),
+ /* B */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call C */
+ BPF_EXIT_INSN(),
+ /* C */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call D */
+ BPF_EXIT_INSN(),
+ /* D */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call E */
+ BPF_EXIT_INSN(),
+ /* E */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call F */
+ BPF_EXIT_INSN(),
+ /* F */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call G */
+ BPF_EXIT_INSN(),
+ /* G */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */
+ BPF_EXIT_INSN(),
+ /* H */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .errstr = "call stack",
+ .result = REJECT,
+ },
+ {
+ "calls: spill into caller stack frame",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .errstr = "cannot spill",
+ .result = REJECT,
+ },
+ {
+ "calls: write into caller stack frame",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "calls: write into callee stack frame",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -8),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .errstr = "cannot return stack pointer",
+ .result = REJECT,
+ },
+ {
+ "calls: two calls with stack write and void return",
+ .insns = {
+ /* main prog */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* write into stack frame of main prog */
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_EXIT_INSN(), /* void return */
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = ACCEPT,
+ },
+ {
+ "calls: ambiguous return value",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "allowed for root only",
+ .result_unpriv = REJECT,
+ .errstr = "R0 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "calls: two calls that return map_value",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ /* fetch secound map_value_ptr from the stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ /* call 3rd function twice */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* first time with fp-8 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ /* second time with fp-16 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ /* lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr into stack frame of main prog */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(), /* return 0 */
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .fixup_map1 = { 23 },
+ .result = ACCEPT,
+ },
+ {
+ "calls: two calls that return map_value with bool condition",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ /* call 3rd function twice */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* first time with fp-8 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ /* second time with fp-16 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ /* fetch secound map_value_ptr from the stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ /* lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(), /* return 0 */
+ /* write map_value_ptr into stack frame of main prog */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(), /* return 1 */
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .fixup_map1 = { 23 },
+ .result = ACCEPT,
+ },
+ {
+ "calls: two calls that return map_value with incorrect bool check",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ /* call 3rd function twice */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* first time with fp-8 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ /* second time with fp-16 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ /* fetch secound map_value_ptr from the stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ /* lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(), /* return 0 */
+ /* write map_value_ptr into stack frame of main prog */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(), /* return 1 */
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .fixup_map1 = { 23 },
+ .result = REJECT,
+ .errstr = "invalid read from stack off -16+0 size 8",
+ },
+ {
+ "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* 1st lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+
+ /* 2nd lookup from map */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-16 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+
+ /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* if arg2 == 1 do *arg1 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+ /* if arg4 == 1 do *arg3 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 12, 22 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=8 off=2 size=8",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* 1st lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+
+ /* 2nd lookup from map */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-16 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+
+ /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* if arg2 == 1 do *arg1 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+ /* if arg4 == 1 do *arg3 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 12, 22 },
+ .result = ACCEPT,
+ },
+ {
+ "calls: two jumps that receive map_value via arg=ptr_stack_of_jumper. test3",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* 1st lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+
+ /* 2nd lookup from map */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_9, 0), // 26
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-16 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+
+ /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), // 30
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), // 34
+ BPF_JMP_IMM(BPF_JA, 0, 0, -30),
+
+ /* subprog 2 */
+ /* if arg2 == 1 do *arg1 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+ /* if arg4 == 1 do *arg3 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -8),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 12, 22 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=8 off=2 size=8",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: two calls that receive map_value_ptr_or_null via arg. test1",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* 1st lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+
+ /* 2nd lookup from map */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+
+ /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* if arg2 == 1 do *arg1 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+ /* if arg4 == 1 do *arg3 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 12, 22 },
+ .result = ACCEPT,
+ },
+ {
+ "calls: two calls that receive map_value_ptr_or_null via arg. test2",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* 1st lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+
+ /* 2nd lookup from map */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+
+ /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* if arg2 == 1 do *arg1 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+ /* if arg4 == 0 do *arg3 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 0, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 12, 22 },
+ .result = REJECT,
+ .errstr = "R0 invalid mem access 'inv'",
+ },
+ {
+ "calls: pkt_ptr spill into caller stack",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ /* spill unchecked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ /* now the pkt range is verified, read pkt_ptr from stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = POINTER_VALUE,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ /* Marking is still kept, but not in all cases safe. */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ /* spill unchecked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ /* now the pkt range is verified, read pkt_ptr from stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "invalid access to packet",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 3",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ /* Marking is still kept and safe here. */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ /* spill unchecked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* now the pkt range is verified, read pkt_ptr from stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ /* Check marking propagated. */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ /* spill unchecked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 5",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ /* spill checked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "same insn cannot be used with different",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 6",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ /* spill checked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "R4 invalid mem access",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 7",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ /* spill checked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "R4 invalid mem access",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 8",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ /* spill checked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 9",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ /* spill unchecked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "invalid access to packet",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: caller stack init to zero or map_value_or_null",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ /* fetch map_value_or_null or const_zero from stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ /* store into map_value */
+ BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ /* if (ctx == 0) return; */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8),
+ /* else bpf_map_lookup() and *(fp - 8) = r0 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 13 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "calls: stack init to zero and pruning",
+ .insns = {
+ /* first make allocated_stack 16 byte */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ /* now fork the execution such that the false branch
+ * of JGT insn will be verified second and it skisp zero
+ * init of fp-8 stack slot. If stack liveness marking
+ * is missing live_read marks from call map_lookup
+ * processing then pruning will incorrectly assume
+ * that fp-8 stack slot was unused in the fall-through
+ * branch and will accept the program incorrectly
+ */
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 6 },
+ .errstr = "invalid indirect read from stack off -8+0 size 8",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "calls: two calls returning different map pointers for lookup (hash, array)",
+ .insns = {
+ /* main prog */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+ BPF_CALL_REL(11),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_CALL_REL(12),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* subprog 1 */
+ BPF_LD_MAP_FD(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* subprog 2 */
+ BPF_LD_MAP_FD(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map2 = { 13 },
+ .fixup_map4 = { 16 },
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "calls: two calls returning different map pointers for lookup (hash, map in map)",
+ .insns = {
+ /* main prog */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+ BPF_CALL_REL(11),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_CALL_REL(12),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* subprog 1 */
+ BPF_LD_MAP_FD(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* subprog 2 */
+ BPF_LD_MAP_FD(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_in_map = { 16 },
+ .fixup_map4 = { 13 },
+ .result = REJECT,
+ .errstr = "R0 invalid mem access 'map_ptr'",
+ },
+ {
+ "cond: two branches returning different map pointers for lookup (tail, tail)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 3),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 5 },
+ .fixup_prog2 = { 2 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "tail_call abusing map_ptr",
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "cond: two branches returning same map pointers for lookup (tail, tail)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 3),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog2 = { 2, 5 },
+ .result_unpriv = ACCEPT,
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "search pruning: all branches should be verified (nop operation)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_A(1),
+ BPF_MOV64_IMM(BPF_REG_4, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16),
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_5, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "R6 invalid mem access 'inv'",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "search pruning: all branches should be verified (invalid stack access)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16),
+ BPF_JMP_A(1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -24),
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "invalid read from stack off -16+0 size 8",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "jit: lsh, rsh, arsh by 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_1, 0xff),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 1),
+ BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 1),
+ BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0xff, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x7f, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "jit: mov32 for ldimm64, 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_LD_IMM64(BPF_REG_1, 0xfeffffffffffffffULL),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32),
+ BPF_LD_IMM64(BPF_REG_2, 0xfeffffffULL),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "jit: mov32 for ldimm64, 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64(BPF_REG_1, 0x1ffffffffULL),
+ BPF_LD_IMM64(BPF_REG_2, 0xffffffffULL),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "jit: various mul tests",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+ BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+ BPF_LD_IMM64(BPF_REG_1, 0xefefefULL),
+ BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+ BPF_ALU64_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_REG(BPF_REG_2, BPF_REG_2),
+ BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+ BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+ BPF_ALU32_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL),
+ BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL),
+ BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+ BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "xadd/w check unaligned stack",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -7),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "misaligned stack access off",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "xadd/w check unaligned map",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_STX_XADD(BPF_W, BPF_REG_0, BPF_REG_1, 3),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = REJECT,
+ .errstr = "misaligned value access off",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "xadd/w check unaligned pkt",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 99),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 6),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0),
+ BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 1),
+ BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "BPF_XADD stores into R2 packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "xadd/w check whether src/dst got mangled, 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 3,
+ },
+ {
+ "xadd/w check whether src/dst got mangled, 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+ BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+ BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 3,
+ },
+ {
+ "bpf_get_stack return R0 within range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)/2),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)/2),
+ BPF_MOV64_IMM(BPF_REG_4, 256),
+ BPF_EMIT_CALL(BPF_FUNC_get_stack),
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32),
+ BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32),
+ BPF_JMP_REG(BPF_JSLT, BPF_REG_8, BPF_REG_1, 16),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
+ BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 32),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)/2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_9),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_EMIT_CALL(BPF_FUNC_get_stack),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "ld_abs: invalid op 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_DW, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "unknown opcode",
+ },
+ {
+ "ld_abs: invalid op 2",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 256),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LD_IND(BPF_DW, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "unknown opcode",
+ },
+ {
+ "ld_abs: nmap reduced",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_H, 12),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 28),
+ BPF_LD_ABS(BPF_H, 12),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 26),
+ BPF_MOV32_IMM(BPF_REG_0, 18),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -64),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -64),
+ BPF_LD_IND(BPF_W, BPF_REG_7, 14),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -60),
+ BPF_MOV32_IMM(BPF_REG_0, 280971478),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -60),
+ BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 15),
+ BPF_LD_ABS(BPF_H, 12),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 13),
+ BPF_MOV32_IMM(BPF_REG_0, 22),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56),
+ BPF_LD_IND(BPF_H, BPF_REG_7, 14),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -52),
+ BPF_MOV32_IMM(BPF_REG_0, 17366),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -48),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -48),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -52),
+ BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 256),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .data = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6,
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 256,
+ },
+ {
+ "ld_abs: div + abs, test 1",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_B, 3),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0),
+ BPF_LD_ABS(BPF_B, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
+ BPF_LD_IND(BPF_B, BPF_REG_8, -70),
+ BPF_EXIT_INSN(),
+ },
+ .data = {
+ 10, 20, 30, 40, 50,
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 10,
+ },
+ {
+ "ld_abs: div + abs, test 2",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_B, 3),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0),
+ BPF_LD_ABS(BPF_B, 128),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
+ BPF_LD_IND(BPF_B, BPF_REG_8, -70),
+ BPF_EXIT_INSN(),
+ },
+ .data = {
+ 10, 20, 30, 40, 50,
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "ld_abs: div + abs, test 3",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
+ BPF_LD_ABS(BPF_B, 3),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ },
+ .data = {
+ 10, 20, 30, 40, 50,
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "ld_abs: div + abs, test 4",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
+ BPF_LD_ABS(BPF_B, 256),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ },
+ .data = {
+ 10, 20, 30, 40, 50,
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "ld_abs: vlan + abs, test 1",
+ .insns = { },
+ .data = {
+ 0x34,
+ },
+ .fill_helper = bpf_fill_ld_abs_vlan_push_pop,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0xbef,
+ },
+ {
+ "ld_abs: vlan + abs, test 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_vlan_push),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .data = {
+ 0x34,
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "ld_abs: jump around ld_abs",
+ .insns = { },
+ .data = {
+ 10, 11,
+ },
+ .fill_helper = bpf_fill_jump_around_ld_abs,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 10,
+ },
+ {
+ "ld_dw: xor semi-random 64 bit imms, test 1",
+ .insns = { },
+ .data = { },
+ .fill_helper = bpf_fill_rand_ld_dw,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 4090,
+ },
+ {
+ "ld_dw: xor semi-random 64 bit imms, test 2",
+ .insns = { },
+ .data = { },
+ .fill_helper = bpf_fill_rand_ld_dw,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 2047,
+ },
+ {
+ "ld_dw: xor semi-random 64 bit imms, test 3",
+ .insns = { },
+ .data = { },
+ .fill_helper = bpf_fill_rand_ld_dw,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 511,
+ },
+ {
+ "ld_dw: xor semi-random 64 bit imms, test 4",
+ .insns = { },
+ .data = { },
+ .fill_helper = bpf_fill_rand_ld_dw,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 5,
+ },
+ {
+ "pass unmodified ctx pointer to helper",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_update),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "pass modified ctx pointer to helper, 1",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_update),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "dereference of modified ctx ptr",
+ },
+ {
+ "pass modified ctx pointer to helper, 2",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_socket_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .errstr_unpriv = "dereference of modified ctx ptr",
+ .errstr = "dereference of modified ctx ptr",
+ },
+ {
+ "pass modified ctx pointer to helper, 3",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_update),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "variable ctx access var_off=(0x0; 0x4)",
+ },
+ {
+ "mov64 src == dst",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_2),
+ // Check bounds are OK
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "mov64 src != dst",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
+ // Check bounds are OK
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "calls: ctx read at start of subprog",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+ BPF_JMP_REG(BPF_JSGT, BPF_REG_0, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+ .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+};
+
+static int probe_filter_length(const struct bpf_insn *fp)
+{
+ int len;
+
+ for (len = MAX_INSNS - 1; len > 0; --len)
+ if (fp[len].code != 0 || fp[len].imm != 0)
+ break;
+ return len + 1;
+}
+
+static int create_map(uint32_t type, uint32_t size_key,
+ uint32_t size_value, uint32_t max_elem)
+{
+ int fd;
+
+ fd = bpf_create_map(type, size_key, size_value, max_elem,
+ type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0);
+ if (fd < 0)
+ printf("Failed to create hash map '%s'!\n", strerror(errno));
+
+ return fd;
+}
+
+static int create_prog_dummy1(enum bpf_map_type prog_type)
+{
+ struct bpf_insn prog[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_EXIT_INSN(),
+ };
+
+ return bpf_load_program(prog_type, prog,
+ ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
+static int create_prog_dummy2(enum bpf_map_type prog_type, int mfd, int idx)
+{
+ struct bpf_insn prog[] = {
+ BPF_MOV64_IMM(BPF_REG_3, idx),
+ BPF_LD_MAP_FD(BPF_REG_2, mfd),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 41),
+ BPF_EXIT_INSN(),
+ };
+
+ return bpf_load_program(prog_type, prog,
+ ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
+static int create_prog_array(enum bpf_map_type prog_type, uint32_t max_elem,
+ int p1key)
+{
+ int p2key = 1;
+ int mfd, p1fd, p2fd;
+
+ mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
+ sizeof(int), max_elem, 0);
+ if (mfd < 0) {
+ printf("Failed to create prog array '%s'!\n", strerror(errno));
+ return -1;
+ }
+
+ p1fd = create_prog_dummy1(prog_type);
+ p2fd = create_prog_dummy2(prog_type, mfd, p2key);
+ if (p1fd < 0 || p2fd < 0)
+ goto out;
+ if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0)
+ goto out;
+ if (bpf_map_update_elem(mfd, &p2key, &p2fd, BPF_ANY) < 0)
+ goto out;
+ close(p2fd);
+ close(p1fd);
+
+ return mfd;
+out:
+ close(p2fd);
+ close(p1fd);
+ close(mfd);
+ return -1;
+}
+
+static int create_map_in_map(void)
+{
+ int inner_map_fd, outer_map_fd;
+
+ inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+ sizeof(int), 1, 0);
+ if (inner_map_fd < 0) {
+ printf("Failed to create array '%s'!\n", strerror(errno));
+ return inner_map_fd;
+ }
+
+ outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL,
+ sizeof(int), inner_map_fd, 1, 0);
+ if (outer_map_fd < 0)
+ printf("Failed to create array of maps '%s'!\n",
+ strerror(errno));
+
+ close(inner_map_fd);
+
+ return outer_map_fd;
+}
+
+static int create_cgroup_storage(void)
+{
+ int fd;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
+ sizeof(struct bpf_cgroup_storage_key),
+ TEST_DATA_LEN, 0, 0);
+ if (fd < 0)
+ printf("Failed to create array '%s'!\n", strerror(errno));
+
+ return fd;
+}
+
+static char bpf_vlog[UINT_MAX >> 8];
+
+static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type,
+ struct bpf_insn *prog, int *map_fds)
+{
+ int *fixup_map1 = test->fixup_map1;
+ int *fixup_map2 = test->fixup_map2;
+ int *fixup_map3 = test->fixup_map3;
+ int *fixup_map4 = test->fixup_map4;
+ int *fixup_prog1 = test->fixup_prog1;
+ int *fixup_prog2 = test->fixup_prog2;
+ int *fixup_map_in_map = test->fixup_map_in_map;
+ int *fixup_cgroup_storage = test->fixup_cgroup_storage;
+
+ if (test->fill_helper)
+ test->fill_helper(test);
+
+ /* Allocating HTs with 1 elem is fine here, since we only test
+ * for verifier and not do a runtime lookup, so the only thing
+ * that really matters is value size in this case.
+ */
+ if (*fixup_map1) {
+ map_fds[0] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
+ sizeof(long long), 1);
+ do {
+ prog[*fixup_map1].imm = map_fds[0];
+ fixup_map1++;
+ } while (*fixup_map1);
+ }
+
+ if (*fixup_map2) {
+ map_fds[1] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
+ sizeof(struct test_val), 1);
+ do {
+ prog[*fixup_map2].imm = map_fds[1];
+ fixup_map2++;
+ } while (*fixup_map2);
+ }
+
+ if (*fixup_map3) {
+ map_fds[2] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
+ sizeof(struct other_val), 1);
+ do {
+ prog[*fixup_map3].imm = map_fds[2];
+ fixup_map3++;
+ } while (*fixup_map3);
+ }
+
+ if (*fixup_map4) {
+ map_fds[3] = create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+ sizeof(struct test_val), 1);
+ do {
+ prog[*fixup_map4].imm = map_fds[3];
+ fixup_map4++;
+ } while (*fixup_map4);
+ }
+
+ if (*fixup_prog1) {
+ map_fds[4] = create_prog_array(prog_type, 4, 0);
+ do {
+ prog[*fixup_prog1].imm = map_fds[4];
+ fixup_prog1++;
+ } while (*fixup_prog1);
+ }
+
+ if (*fixup_prog2) {
+ map_fds[5] = create_prog_array(prog_type, 8, 7);
+ do {
+ prog[*fixup_prog2].imm = map_fds[5];
+ fixup_prog2++;
+ } while (*fixup_prog2);
+ }
+
+ if (*fixup_map_in_map) {
+ map_fds[6] = create_map_in_map();
+ do {
+ prog[*fixup_map_in_map].imm = map_fds[6];
+ fixup_map_in_map++;
+ } while (*fixup_map_in_map);
+ }
+
+ if (*fixup_cgroup_storage) {
+ map_fds[7] = create_cgroup_storage();
+ do {
+ prog[*fixup_cgroup_storage].imm = map_fds[7];
+ fixup_cgroup_storage++;
+ } while (*fixup_cgroup_storage);
+ }
+}
+
+static int set_admin(bool admin)
+{
+ cap_t caps;
+ const cap_value_t cap_val = CAP_SYS_ADMIN;
+ int ret = -1;
+
+ caps = cap_get_proc();
+ if (!caps) {
+ perror("cap_get_proc");
+ return -1;
+ }
+ if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
+ admin ? CAP_SET : CAP_CLEAR)) {
+ perror("cap_set_flag");
+ goto out;
+ }
+ if (cap_set_proc(caps)) {
+ perror("cap_set_proc");
+ goto out;
+ }
+ ret = 0;
+out:
+ if (cap_free(caps))
+ perror("cap_free");
+ return ret;
+}
+
+static void do_test_single(struct bpf_test *test, bool unpriv,
+ int *passes, int *errors)
+{
+ int fd_prog, expected_ret, alignment_prevented_execution;
+ int prog_len, prog_type = test->prog_type;
+ struct bpf_insn *prog = test->insns;
+ int map_fds[MAX_NR_MAPS];
+ const char *expected_err;
+ uint32_t expected_val;
+ uint32_t retval;
+ __u32 pflags;
+ int i, err;
+
+ for (i = 0; i < MAX_NR_MAPS; i++)
+ map_fds[i] = -1;
+
+ if (!prog_type)
+ prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ do_test_fixup(test, prog_type, prog, map_fds);
+ prog_len = probe_filter_length(prog);
+
+ pflags = 0;
+ if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
+ pflags |= BPF_F_STRICT_ALIGNMENT;
+ if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
+ pflags |= BPF_F_ANY_ALIGNMENT;
+ fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags,
+ "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1);
+
+ expected_ret = unpriv && test->result_unpriv != UNDEF ?
+ test->result_unpriv : test->result;
+ expected_err = unpriv && test->errstr_unpriv ?
+ test->errstr_unpriv : test->errstr;
+ expected_val = unpriv && test->retval_unpriv ?
+ test->retval_unpriv : test->retval;
+
+ alignment_prevented_execution = 0;
+
+ if (expected_ret == ACCEPT) {
+ if (fd_prog < 0) {
+ printf("FAIL\nFailed to load prog '%s'!\n",
+ strerror(errno));
+ goto fail_log;
+ }
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ if (fd_prog >= 0 &&
+ (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)) {
+ alignment_prevented_execution = 1;
+ goto test_ok;
+ }
+#endif
+ } else {
+ if (fd_prog >= 0) {
+ printf("FAIL\nUnexpected success to load!\n");
+ goto fail_log;
+ }
+ if (!strstr(bpf_vlog, expected_err)) {
+ printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n",
+ expected_err, bpf_vlog);
+ goto fail_log;
+ }
+ }
+
+ if (fd_prog >= 0) {
+ __u8 tmp[TEST_DATA_LEN << 2];
+ __u32 size_tmp = sizeof(tmp);
+
+ if (unpriv)
+ set_admin(true);
+ err = bpf_prog_test_run(fd_prog, 1, test->data,
+ sizeof(test->data), tmp, &size_tmp,
+ &retval, NULL);
+ if (unpriv)
+ set_admin(false);
+ if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
+ printf("Unexpected bpf_prog_test_run error\n");
+ goto fail_log;
+ }
+ if (!err && retval != expected_val &&
+ expected_val != POINTER_VALUE) {
+ printf("FAIL retval %d != %d\n", retval, expected_val);
+ goto fail_log;
+ }
+ }
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+test_ok:
+#endif
+ (*passes)++;
+ printf("OK%s\n", alignment_prevented_execution ?
+ " (NOTE: not executed due to unknown alignment)" : "");
+close_fds:
+ close(fd_prog);
+ for (i = 0; i < MAX_NR_MAPS; i++)
+ close(map_fds[i]);
+ sched_yield();
+ return;
+fail_log:
+ (*errors)++;
+ printf("%s", bpf_vlog);
+ goto close_fds;
+}
+
+static bool is_admin(void)
+{
+ cap_t caps;
+ cap_flag_value_t sysadmin = CAP_CLEAR;
+ const cap_value_t cap_val = CAP_SYS_ADMIN;
+
+#ifdef CAP_IS_SUPPORTED
+ if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
+ perror("cap_get_flag");
+ return false;
+ }
+#endif
+ caps = cap_get_proc();
+ if (!caps) {
+ perror("cap_get_proc");
+ return false;
+ }
+ if (cap_get_flag(caps, cap_val, CAP_EFFECTIVE, &sysadmin))
+ perror("cap_get_flag");
+ if (cap_free(caps))
+ perror("cap_free");
+ return (sysadmin == CAP_SET);
+}
+
+static void get_unpriv_disabled()
+{
+ char buf[2];
+ FILE *fd;
+
+ fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
+ if (!fd) {
+ perror("fopen /proc/sys/"UNPRIV_SYSCTL);
+ unpriv_disabled = true;
+ return;
+ }
+ if (fgets(buf, 2, fd) == buf && atoi(buf))
+ unpriv_disabled = true;
+ fclose(fd);
+}
+
+static bool test_as_unpriv(struct bpf_test *test)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ /* Some architectures have strict alignment requirements. In
+ * that case, the BPF verifier detects if a program has
+ * unaligned accesses and rejects them. A user can pass
+ * BPF_F_ANY_ALIGNMENT to a program to override this
+ * check. That, however, will only work when a privileged user
+ * loads a program. An unprivileged user loading a program
+ * with this flag will be rejected prior entering the
+ * verifier.
+ */
+ if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
+ return false;
+#endif
+ return !test->prog_type ||
+ test->prog_type == BPF_PROG_TYPE_SOCKET_FILTER ||
+ test->prog_type == BPF_PROG_TYPE_CGROUP_SKB;
+}
+
+static int do_test(bool unpriv, unsigned int from, unsigned int to)
+{
+ int i, passes = 0, errors = 0, skips = 0;
+
+ for (i = from; i < to; i++) {
+ struct bpf_test *test = &tests[i];
+
+ /* Program types that are not supported by non-root we
+ * skip right away.
+ */
+ if (test_as_unpriv(test) && unpriv_disabled) {
+ printf("#%d/u %s SKIP\n", i, test->descr);
+ skips++;
+ } else if (test_as_unpriv(test)) {
+ if (!unpriv)
+ set_admin(false);
+ printf("#%d/u %s ", i, test->descr);
+ do_test_single(test, true, &passes, &errors);
+ if (!unpriv)
+ set_admin(true);
+ }
+
+ if (unpriv) {
+ printf("#%d/p %s SKIP\n", i, test->descr);
+ skips++;
+ } else {
+ printf("#%d/p %s ", i, test->descr);
+ do_test_single(test, false, &passes, &errors);
+ }
+ }
+
+ printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes,
+ skips, errors);
+ return errors ? EXIT_FAILURE : EXIT_SUCCESS;
+}
+
+int main(int argc, char **argv)
+{
+ unsigned int from = 0, to = ARRAY_SIZE(tests);
+ bool unpriv = !is_admin();
+
+ if (argc == 3) {
+ unsigned int l = atoi(argv[argc - 2]);
+ unsigned int u = atoi(argv[argc - 1]);
+
+ if (l < to && u < to) {
+ from = l;
+ to = u + 1;
+ }
+ } else if (argc == 2) {
+ unsigned int t = atoi(argv[argc - 1]);
+
+ if (t < to) {
+ from = t;
+ to = t + 1;
+ }
+ }
+
+ get_unpriv_disabled();
+ if (unpriv && unpriv_disabled) {
+ printf("Cannot run as unprivileged user with sysctl %s.\n",
+ UNPRIV_SYSCTL);
+ return EXIT_FAILURE;
+ }
+
+ bpf_semi_rand_init();
+ return do_test(unpriv, from, to);
+}
diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c
new file mode 100644
index 000000000..8d6918c3b
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_verifier_log.c
@@ -0,0 +1,174 @@
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/unistd.h>
+
+#include <bpf/bpf.h>
+
+#include "bpf_rlimit.h"
+
+#define LOG_SIZE (1 << 20)
+
+#define err(str...) printf("ERROR: " str)
+
+static const struct bpf_insn code_sample[] = {
+ /* We need a few instructions to pass the min log length */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+};
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+static int load(char *log, size_t log_len, int log_level)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insn_cnt = (__u32)(sizeof(code_sample) / sizeof(struct bpf_insn));
+ attr.insns = ptr_to_u64(code_sample);
+ attr.license = ptr_to_u64("GPL");
+ attr.log_buf = ptr_to_u64(log);
+ attr.log_size = log_len;
+ attr.log_level = log_level;
+
+ return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
+static void check_ret(int ret, int exp_errno)
+{
+ if (ret > 0) {
+ close(ret);
+ err("broken sample loaded successfully!?\n");
+ exit(1);
+ }
+
+ if (!ret || errno != exp_errno) {
+ err("Program load returned: ret:%d/errno:%d, expected ret:%d/errno:%d\n",
+ ret, errno, -1, exp_errno);
+ exit(1);
+ }
+}
+
+static void check_ones(const char *buf, size_t len, const char *msg)
+{
+ while (len--)
+ if (buf[len] != 1) {
+ err("%s", msg);
+ exit(1);
+ }
+}
+
+static void test_log_good(char *log, size_t buf_len, size_t log_len,
+ size_t exp_len, int exp_errno, const char *full_log)
+{
+ size_t len;
+ int ret;
+
+ memset(log, 1, buf_len);
+
+ ret = load(log, log_len, 1);
+ check_ret(ret, exp_errno);
+
+ len = strnlen(log, buf_len);
+ if (len == buf_len) {
+ err("verifier did not NULL terminate the log\n");
+ exit(1);
+ }
+ if (exp_len && len != exp_len) {
+ err("incorrect log length expected:%zd have:%zd\n",
+ exp_len, len);
+ exit(1);
+ }
+
+ if (strchr(log, 1)) {
+ err("verifier leaked a byte through\n");
+ exit(1);
+ }
+
+ check_ones(log + len + 1, buf_len - len - 1,
+ "verifier wrote bytes past NULL termination\n");
+
+ if (memcmp(full_log, log, LOG_SIZE)) {
+ err("log did not match expected output\n");
+ exit(1);
+ }
+}
+
+static void test_log_bad(char *log, size_t log_len, int log_level)
+{
+ int ret;
+
+ ret = load(log, log_len, log_level);
+ check_ret(ret, EINVAL);
+ if (log)
+ check_ones(log, LOG_SIZE,
+ "verifier touched log with bad parameters\n");
+}
+
+int main(int argc, char **argv)
+{
+ char full_log[LOG_SIZE];
+ char log[LOG_SIZE];
+ size_t want_len;
+ int i;
+
+ memset(log, 1, LOG_SIZE);
+
+ /* Test incorrect attr */
+ printf("Test log_level 0...\n");
+ test_log_bad(log, LOG_SIZE, 0);
+
+ printf("Test log_size < 128...\n");
+ test_log_bad(log, 15, 1);
+
+ printf("Test log_buff = NULL...\n");
+ test_log_bad(NULL, LOG_SIZE, 1);
+
+ /* Test with log big enough */
+ printf("Test oversized buffer...\n");
+ test_log_good(full_log, LOG_SIZE, LOG_SIZE, 0, EACCES, full_log);
+
+ want_len = strlen(full_log);
+
+ printf("Test exact buffer...\n");
+ test_log_good(log, LOG_SIZE, want_len + 2, want_len, EACCES, full_log);
+
+ printf("Test undersized buffers...\n");
+ for (i = 0; i < 64; i++) {
+ full_log[want_len - i + 1] = 1;
+ full_log[want_len - i] = 0;
+
+ test_log_good(log, LOG_SIZE, want_len + 1 - i, want_len - i,
+ ENOSPC, full_log);
+ }
+
+ printf("test_verifier_log: OK\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_xdp.c b/tools/testing/selftests/bpf/test_xdp.c
new file mode 100644
index 000000000..5e7df8bb5
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp.c
@@ -0,0 +1,235 @@
+/* Copyright (c) 2016,2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+#include "test_iptunnel_common.h"
+
+int _version SEC("version") = 1;
+
+struct bpf_map_def SEC("maps") rxcnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = 256,
+};
+
+struct bpf_map_def SEC("maps") vip2tnl = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct vip),
+ .value_size = sizeof(struct iptnl_info),
+ .max_entries = MAX_IPTNL_ENTRIES,
+};
+
+static __always_inline void count_tx(__u32 protocol)
+{
+ __u64 *rxcnt_count;
+
+ rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
+ if (rxcnt_count)
+ *rxcnt_count += 1;
+}
+
+static __always_inline int get_dport(void *trans_data, void *data_end,
+ __u8 protocol)
+{
+ struct tcphdr *th;
+ struct udphdr *uh;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)trans_data;
+ if (th + 1 > data_end)
+ return -1;
+ return th->dest;
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)trans_data;
+ if (uh + 1 > data_end)
+ return -1;
+ return uh->dest;
+ default:
+ return 0;
+ }
+}
+
+static __always_inline void set_ethhdr(struct ethhdr *new_eth,
+ const struct ethhdr *old_eth,
+ const struct iptnl_info *tnl,
+ __be16 h_proto)
+{
+ memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
+ memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
+ new_eth->h_proto = h_proto;
+}
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ struct iphdr *iph = data + sizeof(struct ethhdr);
+ __u16 *next_iph;
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+ __u32 csum = 0;
+ int i;
+
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+
+ dport = get_dport(iph + 1, data_end, iph->protocol);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = iph->protocol;
+ vip.family = AF_INET;
+ vip.daddr.v4 = iph->daddr;
+ vip.dport = dport;
+ payload_len = bpf_ntohs(iph->tot_len);
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v4-in-v4 */
+ if (!tnl || tnl->family != AF_INET)
+ return XDP_PASS;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+ return XDP_DROP;
+
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+
+ new_eth = data;
+ iph = data + sizeof(*new_eth);
+ old_eth = data + sizeof(*iph);
+
+ if (new_eth + 1 > data_end ||
+ old_eth + 1 > data_end ||
+ iph + 1 > data_end)
+ return XDP_DROP;
+
+ set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP));
+
+ iph->version = 4;
+ iph->ihl = sizeof(*iph) >> 2;
+ iph->frag_off = 0;
+ iph->protocol = IPPROTO_IPIP;
+ iph->check = 0;
+ iph->tos = 0;
+ iph->tot_len = bpf_htons(payload_len + sizeof(*iph));
+ iph->daddr = tnl->daddr.v4;
+ iph->saddr = tnl->saddr.v4;
+ iph->ttl = 8;
+
+ next_iph = (__u16 *)iph;
+#pragma clang loop unroll(full)
+ for (i = 0; i < sizeof(*iph) >> 1; i++)
+ csum += *next_iph++;
+
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+static __always_inline int handle_ipv6(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+
+ if (ip6h + 1 > data_end)
+ return XDP_DROP;
+
+ dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = ip6h->nexthdr;
+ vip.family = AF_INET6;
+ memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
+ vip.dport = dport;
+ payload_len = ip6h->payload_len;
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v6-in-v6 */
+ if (!tnl || tnl->family != AF_INET6)
+ return XDP_PASS;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+ return XDP_DROP;
+
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+
+ new_eth = data;
+ ip6h = data + sizeof(*new_eth);
+ old_eth = data + sizeof(*ip6h);
+
+ if (new_eth + 1 > data_end || old_eth + 1 > data_end ||
+ ip6h + 1 > data_end)
+ return XDP_DROP;
+
+ set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6));
+
+ ip6h->version = 6;
+ ip6h->priority = 0;
+ memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+ ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + sizeof(*ip6h));
+ ip6h->nexthdr = IPPROTO_IPV6;
+ ip6h->hop_limit = 8;
+ memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
+ memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+SEC("xdp_tx_iptunnel")
+int _xdp_tx_iptunnel(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ __u16 h_proto;
+
+ if (eth + 1 > data_end)
+ return XDP_DROP;
+
+ h_proto = eth->h_proto;
+
+ if (h_proto == bpf_htons(ETH_P_IP))
+ return handle_ipv4(xdp);
+ else if (h_proto == bpf_htons(ETH_P_IPV6))
+
+ return handle_ipv6(xdp);
+ else
+ return XDP_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.c b/tools/testing/selftests/bpf/test_xdp_meta.c
new file mode 100644
index 000000000..8d0182650
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_meta.c
@@ -0,0 +1,53 @@
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
+
+#include "bpf_helpers.h"
+
+#define __round_mask(x, y) ((__typeof__(x))((y) - 1))
+#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
+#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
+
+SEC("t")
+int ing_cls(struct __sk_buff *ctx)
+{
+ __u8 *data, *data_meta, *data_end;
+ __u32 diff = 0;
+
+ data_meta = ctx_ptr(ctx, data_meta);
+ data_end = ctx_ptr(ctx, data_end);
+ data = ctx_ptr(ctx, data);
+
+ if (data + ETH_ALEN > data_end ||
+ data_meta + round_up(ETH_ALEN, 4) > data)
+ return TC_ACT_SHOT;
+
+ diff |= ((__u32 *)data_meta)[0] ^ ((__u32 *)data)[0];
+ diff |= ((__u16 *)data_meta)[2] ^ ((__u16 *)data)[2];
+
+ return diff ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+SEC("x")
+int ing_xdp(struct xdp_md *ctx)
+{
+ __u8 *data, *data_meta, *data_end;
+ int ret;
+
+ ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, 4));
+ if (ret < 0)
+ return XDP_DROP;
+
+ data_meta = ctx_ptr(ctx, data_meta);
+ data_end = ctx_ptr(ctx, data_end);
+ data = ctx_ptr(ctx, data);
+
+ if (data + ETH_ALEN > data_end ||
+ data_meta + round_up(ETH_ALEN, 4) > data)
+ return XDP_DROP;
+
+ __builtin_memcpy(data_meta, data, ETH_ALEN);
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh
new file mode 100755
index 000000000..637fcf4fe
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_meta.sh
@@ -0,0 +1,52 @@
+#!/bin/sh
+
+cleanup()
+{
+ if [ "$?" = "0" ]; then
+ echo "selftests: test_xdp_meta [PASS]";
+ else
+ echo "selftests: test_xdp_meta [FAILED]";
+ fi
+
+ set +e
+ ip link del veth1 2> /dev/null
+ ip netns del ns1 2> /dev/null
+ ip netns del ns2 2> /dev/null
+}
+
+ip link set dev lo xdp off 2>/dev/null > /dev/null
+if [ $? -ne 0 ];then
+ echo "selftests: [SKIP] Could not run test without the ip xdp support"
+ exit 0
+fi
+set -e
+
+ip netns add ns1
+ip netns add ns2
+
+trap cleanup 0 2 3 6 9
+
+ip link add veth1 type veth peer name veth2
+
+ip link set veth1 netns ns1
+ip link set veth2 netns ns2
+
+ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth1
+ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth2
+
+ip netns exec ns1 tc qdisc add dev veth1 clsact
+ip netns exec ns2 tc qdisc add dev veth2 clsact
+
+ip netns exec ns1 tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t
+ip netns exec ns2 tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t
+
+ip netns exec ns1 ip link set dev veth1 xdp obj test_xdp_meta.o sec x
+ip netns exec ns2 ip link set dev veth2 xdp obj test_xdp_meta.o sec x
+
+ip netns exec ns1 ip link set dev veth1 up
+ip netns exec ns2 ip link set dev veth2 up
+
+ip netns exec ns1 ping -c 1 10.1.1.22
+ip netns exec ns2 ping -c 1 10.1.1.11
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_xdp_noinline.c b/tools/testing/selftests/bpf/test_xdp_noinline.c
new file mode 100644
index 000000000..5e4aac74f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_noinline.c
@@ -0,0 +1,833 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+static __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+#define JHASH_INITVAL 0xdeadbeef
+
+typedef unsigned int u32;
+
+static __attribute__ ((noinline))
+u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const unsigned char *k = key;
+
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ while (length > 12) {
+ a += *(u32 *)(k);
+ b += *(u32 *)(k + 4);
+ c += *(u32 *)(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+static __attribute__ ((noinline))
+u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += initval;
+ b += initval;
+ c += initval;
+ __jhash_final(a, b, c);
+ return c;
+}
+
+static __attribute__ ((noinline))
+u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+struct flow_key {
+ union {
+ __be32 src;
+ __be32 srcv6[4];
+ };
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u8 proto;
+};
+
+struct packet_description {
+ struct flow_key flow;
+ __u8 flags;
+};
+
+struct ctl_value {
+ union {
+ __u64 value;
+ __u32 ifindex;
+ __u8 mac[6];
+ };
+};
+
+struct vip_definition {
+ union {
+ __be32 vip;
+ __be32 vipv6[4];
+ };
+ __u16 port;
+ __u16 family;
+ __u8 proto;
+};
+
+struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+};
+
+struct real_pos_lru {
+ __u32 pos;
+ __u64 atime;
+};
+
+struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+};
+
+struct lb_stats {
+ __u64 v2;
+ __u64 v1;
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct vip_definition),
+ .value_size = sizeof(struct vip_meta),
+ .max_entries = 512,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = {
+ .type = BPF_MAP_TYPE_LRU_HASH,
+ .key_size = sizeof(struct flow_key),
+ .value_size = sizeof(struct real_pos_lru),
+ .max_entries = 300,
+ .map_flags = 1U << 1,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 12 * 655,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) reals = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct real_definition),
+ .max_entries = 40,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) stats = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct lb_stats),
+ .max_entries = 515,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct ctl_value),
+ .max_entries = 16,
+ .map_flags = 0,
+};
+
+struct eth_hdr {
+ unsigned char eth_dest[6];
+ unsigned char eth_source[6];
+ unsigned short eth_proto;
+};
+
+static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
+{
+ __u64 off = sizeof(struct eth_hdr);
+ if (is_ipv6) {
+ off += sizeof(struct ipv6hdr);
+ if (is_icmp)
+ off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr);
+ } else {
+ off += sizeof(struct iphdr);
+ if (is_icmp)
+ off += sizeof(struct icmphdr) + sizeof(struct iphdr);
+ }
+ return off;
+}
+
+static __attribute__ ((noinline))
+bool parse_udp(void *data, void *data_end,
+ bool is_ipv6, struct packet_description *pckt)
+{
+
+ bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
+ __u64 off = calc_offset(is_ipv6, is_icmp);
+ struct udphdr *udp;
+ udp = data + off;
+
+ if (udp + 1 > data_end)
+ return 0;
+ if (!is_icmp) {
+ pckt->flow.port16[0] = udp->source;
+ pckt->flow.port16[1] = udp->dest;
+ } else {
+ pckt->flow.port16[0] = udp->dest;
+ pckt->flow.port16[1] = udp->source;
+ }
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool parse_tcp(void *data, void *data_end,
+ bool is_ipv6, struct packet_description *pckt)
+{
+
+ bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
+ __u64 off = calc_offset(is_ipv6, is_icmp);
+ struct tcphdr *tcp;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return 0;
+ if (tcp->syn)
+ pckt->flags |= (1 << 1);
+ if (!is_icmp) {
+ pckt->flow.port16[0] = tcp->source;
+ pckt->flow.port16[1] = tcp->dest;
+ } else {
+ pckt->flow.port16[0] = tcp->dest;
+ pckt->flow.port16[1] = tcp->source;
+ }
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
+ struct packet_description *pckt,
+ struct real_definition *dst, __u32 pkt_bytes)
+{
+ struct eth_hdr *new_eth;
+ struct eth_hdr *old_eth;
+ struct ipv6hdr *ip6h;
+ __u32 ip_suffix;
+ void *data_end;
+ void *data;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+ return 0;
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+ new_eth = data;
+ ip6h = data + sizeof(struct eth_hdr);
+ old_eth = data + sizeof(struct ipv6hdr);
+ if (new_eth + 1 > data_end ||
+ old_eth + 1 > data_end || ip6h + 1 > data_end)
+ return 0;
+ memcpy(new_eth->eth_dest, cval->mac, 6);
+ memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
+ new_eth->eth_proto = 56710;
+ ip6h->version = 6;
+ ip6h->priority = 0;
+ memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+
+ ip6h->nexthdr = IPPROTO_IPV6;
+ ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0];
+ ip6h->payload_len =
+ __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr));
+ ip6h->hop_limit = 4;
+
+ ip6h->saddr.in6_u.u6_addr32[0] = 1;
+ ip6h->saddr.in6_u.u6_addr32[1] = 2;
+ ip6h->saddr.in6_u.u6_addr32[2] = 3;
+ ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
+ memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
+ struct packet_description *pckt,
+ struct real_definition *dst, __u32 pkt_bytes)
+{
+
+ __u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]);
+ struct eth_hdr *new_eth;
+ struct eth_hdr *old_eth;
+ __u16 *next_iph_u16;
+ struct iphdr *iph;
+ __u32 csum = 0;
+ void *data_end;
+ void *data;
+
+ ip_suffix <<= 15;
+ ip_suffix ^= pckt->flow.src;
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+ return 0;
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+ new_eth = data;
+ iph = data + sizeof(struct eth_hdr);
+ old_eth = data + sizeof(struct iphdr);
+ if (new_eth + 1 > data_end ||
+ old_eth + 1 > data_end || iph + 1 > data_end)
+ return 0;
+ memcpy(new_eth->eth_dest, cval->mac, 6);
+ memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
+ new_eth->eth_proto = 8;
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->frag_off = 0;
+ iph->protocol = IPPROTO_IPIP;
+ iph->check = 0;
+ iph->tos = 1;
+ iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr));
+ /* don't update iph->daddr, since it will overwrite old eth_proto
+ * and multiple iterations of bpf_prog_run() will fail
+ */
+
+ iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst;
+ iph->ttl = 4;
+
+ next_iph_u16 = (__u16 *) iph;
+#pragma clang loop unroll(full)
+ for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
+ csum += *next_iph_u16++;
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+ if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
+ return 0;
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
+{
+ struct eth_hdr *new_eth;
+ struct eth_hdr *old_eth;
+
+ old_eth = *data;
+ new_eth = *data + sizeof(struct ipv6hdr);
+ memcpy(new_eth->eth_source, old_eth->eth_source, 6);
+ memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
+ if (inner_v4)
+ new_eth->eth_proto = 8;
+ else
+ new_eth->eth_proto = 56710;
+ if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
+ return 0;
+ *data = (void *)(long)xdp->data;
+ *data_end = (void *)(long)xdp->data_end;
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
+{
+ struct eth_hdr *new_eth;
+ struct eth_hdr *old_eth;
+
+ old_eth = *data;
+ new_eth = *data + sizeof(struct iphdr);
+ memcpy(new_eth->eth_source, old_eth->eth_source, 6);
+ memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
+ new_eth->eth_proto = 8;
+ if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
+ return 0;
+ *data = (void *)(long)xdp->data;
+ *data_end = (void *)(long)xdp->data_end;
+ return 1;
+}
+
+static __attribute__ ((noinline))
+int swap_mac_and_send(void *data, void *data_end)
+{
+ unsigned char tmp_mac[6];
+ struct eth_hdr *eth;
+
+ eth = data;
+ memcpy(tmp_mac, eth->eth_source, 6);
+ memcpy(eth->eth_source, eth->eth_dest, 6);
+ memcpy(eth->eth_dest, tmp_mac, 6);
+ return XDP_TX;
+}
+
+static __attribute__ ((noinline))
+int send_icmp_reply(void *data, void *data_end)
+{
+ struct icmphdr *icmp_hdr;
+ __u16 *next_iph_u16;
+ __u32 tmp_addr = 0;
+ struct iphdr *iph;
+ __u32 csum1 = 0;
+ __u32 csum = 0;
+ __u64 off = 0;
+
+ if (data + sizeof(struct eth_hdr)
+ + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end)
+ return XDP_DROP;
+ off += sizeof(struct eth_hdr);
+ iph = data + off;
+ off += sizeof(struct iphdr);
+ icmp_hdr = data + off;
+ icmp_hdr->type = 0;
+ icmp_hdr->checksum += 0x0007;
+ iph->ttl = 4;
+ tmp_addr = iph->daddr;
+ iph->daddr = iph->saddr;
+ iph->saddr = tmp_addr;
+ iph->check = 0;
+ next_iph_u16 = (__u16 *) iph;
+#pragma clang loop unroll(full)
+ for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
+ csum += *next_iph_u16++;
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+ return swap_mac_and_send(data, data_end);
+}
+
+static __attribute__ ((noinline))
+int send_icmp6_reply(void *data, void *data_end)
+{
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+ __be32 tmp_addr[4];
+ __u64 off = 0;
+
+ if (data + sizeof(struct eth_hdr)
+ + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end)
+ return XDP_DROP;
+ off += sizeof(struct eth_hdr);
+ ip6h = data + off;
+ off += sizeof(struct ipv6hdr);
+ icmp_hdr = data + off;
+ icmp_hdr->icmp6_type = 129;
+ icmp_hdr->icmp6_cksum -= 0x0001;
+ ip6h->hop_limit = 4;
+ memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16);
+ memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16);
+ memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16);
+ return swap_mac_and_send(data, data_end);
+}
+
+static __attribute__ ((noinline))
+int parse_icmpv6(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return XDP_DROP;
+ if (icmp_hdr->icmp6_type == 128)
+ return send_icmp6_reply(data, data_end);
+ if (icmp_hdr->icmp6_type != 3)
+ return XDP_PASS;
+ off += sizeof(struct icmp6hdr);
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return XDP_DROP;
+ pckt->flow.proto = ip6h->nexthdr;
+ pckt->flags |= (1 << 0);
+ memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16);
+ memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16);
+ return -1;
+}
+
+static __attribute__ ((noinline))
+int parse_icmp(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmphdr *icmp_hdr;
+ struct iphdr *iph;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return XDP_DROP;
+ if (icmp_hdr->type == 8)
+ return send_icmp_reply(data, data_end);
+ if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4))
+ return XDP_PASS;
+ off += sizeof(struct icmphdr);
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+ if (iph->ihl != 5)
+ return XDP_DROP;
+ pckt->flow.proto = iph->protocol;
+ pckt->flags |= (1 << 0);
+ pckt->flow.src = iph->daddr;
+ pckt->flow.dst = iph->saddr;
+ return -1;
+}
+
+static __attribute__ ((noinline))
+__u32 get_packet_hash(struct packet_description *pckt,
+ bool hash_16bytes)
+{
+ if (hash_16bytes)
+ return jhash_2words(jhash(pckt->flow.srcv6, 16, 12),
+ pckt->flow.ports, 24);
+ else
+ return jhash_2words(pckt->flow.src, pckt->flow.ports,
+ 24);
+}
+
+__attribute__ ((noinline))
+static bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6, void *lru_map)
+{
+ struct real_pos_lru new_dst_lru = { };
+ bool hash_16bytes = is_ipv6;
+ __u32 *real_pos, hash, key;
+ __u64 cur_time;
+
+ if (vip_info->flags & (1 << 2))
+ hash_16bytes = 1;
+ if (vip_info->flags & (1 << 3)) {
+ pckt->flow.port16[0] = pckt->flow.port16[1];
+ memset(pckt->flow.srcv6, 0, 16);
+ }
+ hash = get_packet_hash(pckt, hash_16bytes);
+ if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
+ hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
+ return 0;
+ key = 2 * vip_info->vip_num + hash % 2;
+ real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+ if (!real_pos)
+ return 0;
+ key = *real_pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+ if (!(*real))
+ return 0;
+ if (!(vip_info->flags & (1 << 1))) {
+ __u32 conn_rate_key = 512 + 2;
+ struct lb_stats *conn_rate_stats =
+ bpf_map_lookup_elem(&stats, &conn_rate_key);
+
+ if (!conn_rate_stats)
+ return 1;
+ cur_time = bpf_ktime_get_ns();
+ if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
+ conn_rate_stats->v1 = 1;
+ conn_rate_stats->v2 = cur_time;
+ } else {
+ conn_rate_stats->v1 += 1;
+ if (conn_rate_stats->v1 >= 1)
+ return 1;
+ }
+ if (pckt->flow.proto == IPPROTO_UDP)
+ new_dst_lru.atime = cur_time;
+ new_dst_lru.pos = key;
+ bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
+ }
+ return 1;
+}
+
+__attribute__ ((noinline))
+static void connection_table_lookup(struct real_definition **real,
+ struct packet_description *pckt,
+ void *lru_map)
+{
+
+ struct real_pos_lru *dst_lru;
+ __u64 cur_time;
+ __u32 key;
+
+ dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow);
+ if (!dst_lru)
+ return;
+ if (pckt->flow.proto == IPPROTO_UDP) {
+ cur_time = bpf_ktime_get_ns();
+ if (cur_time - dst_lru->atime > 300000)
+ return;
+ dst_lru->atime = cur_time;
+ }
+ key = dst_lru->pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+}
+
+/* don't believe your eyes!
+ * below function has 6 arguments whereas bpf and llvm allow maximum of 5
+ * but since it's _static_ llvm can optimize one argument away
+ */
+__attribute__ ((noinline))
+static int process_l3_headers_v6(struct packet_description *pckt,
+ __u8 *protocol, __u64 off,
+ __u16 *pkt_bytes, void *data,
+ void *data_end)
+{
+ struct ipv6hdr *ip6h;
+ __u64 iph_len;
+ int action;
+
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return XDP_DROP;
+ iph_len = sizeof(struct ipv6hdr);
+ *protocol = ip6h->nexthdr;
+ pckt->flow.proto = *protocol;
+ *pkt_bytes = __builtin_bswap16(ip6h->payload_len);
+ off += iph_len;
+ if (*protocol == 45) {
+ return XDP_DROP;
+ } else if (*protocol == 59) {
+ action = parse_icmpv6(data, data_end, off, pckt);
+ if (action >= 0)
+ return action;
+ } else {
+ memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16);
+ memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16);
+ }
+ return -1;
+}
+
+__attribute__ ((noinline))
+static int process_l3_headers_v4(struct packet_description *pckt,
+ __u8 *protocol, __u64 off,
+ __u16 *pkt_bytes, void *data,
+ void *data_end)
+{
+ struct iphdr *iph;
+ __u64 iph_len;
+ int action;
+
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+ if (iph->ihl != 5)
+ return XDP_DROP;
+ *protocol = iph->protocol;
+ pckt->flow.proto = *protocol;
+ *pkt_bytes = __builtin_bswap16(iph->tot_len);
+ off += 20;
+ if (iph->frag_off & 65343)
+ return XDP_DROP;
+ if (*protocol == IPPROTO_ICMP) {
+ action = parse_icmp(data, data_end, off, pckt);
+ if (action >= 0)
+ return action;
+ } else {
+ pckt->flow.src = iph->saddr;
+ pckt->flow.dst = iph->daddr;
+ }
+ return -1;
+}
+
+__attribute__ ((noinline))
+static int process_packet(void *data, __u64 off, void *data_end,
+ bool is_ipv6, struct xdp_md *xdp)
+{
+
+ struct real_definition *dst = NULL;
+ struct packet_description pckt = { };
+ struct vip_definition vip = { };
+ struct lb_stats *data_stats;
+ struct eth_hdr *eth = data;
+ void *lru_map = &lru_cache;
+ struct vip_meta *vip_info;
+ __u32 lru_stats_key = 513;
+ __u32 mac_addr_pos = 0;
+ __u32 stats_key = 512;
+ struct ctl_value *cval;
+ __u16 pkt_bytes;
+ __u64 iph_len;
+ __u8 protocol;
+ __u32 vip_num;
+ int action;
+
+ if (is_ipv6)
+ action = process_l3_headers_v6(&pckt, &protocol, off,
+ &pkt_bytes, data, data_end);
+ else
+ action = process_l3_headers_v4(&pckt, &protocol, off,
+ &pkt_bytes, data, data_end);
+ if (action >= 0)
+ return action;
+ protocol = pckt.flow.proto;
+ if (protocol == IPPROTO_TCP) {
+ if (!parse_tcp(data, data_end, is_ipv6, &pckt))
+ return XDP_DROP;
+ } else if (protocol == IPPROTO_UDP) {
+ if (!parse_udp(data, data_end, is_ipv6, &pckt))
+ return XDP_DROP;
+ } else {
+ return XDP_TX;
+ }
+
+ if (is_ipv6)
+ memcpy(vip.vipv6, pckt.flow.dstv6, 16);
+ else
+ vip.vip = pckt.flow.dst;
+ vip.port = pckt.flow.port16[1];
+ vip.proto = pckt.flow.proto;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info) {
+ vip.port = 0;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info)
+ return XDP_PASS;
+ if (!(vip_info->flags & (1 << 4)))
+ pckt.flow.port16[1] = 0;
+ }
+ if (data_end - data > 1400)
+ return XDP_DROP;
+ data_stats = bpf_map_lookup_elem(&stats, &stats_key);
+ if (!data_stats)
+ return XDP_DROP;
+ data_stats->v1 += 1;
+ if (!dst) {
+ if (vip_info->flags & (1 << 0))
+ pckt.flow.port16[0] = 0;
+ if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1)))
+ connection_table_lookup(&dst, &pckt, lru_map);
+ if (dst)
+ goto out;
+ if (pckt.flow.proto == IPPROTO_TCP) {
+ struct lb_stats *lru_stats =
+ bpf_map_lookup_elem(&stats, &lru_stats_key);
+
+ if (!lru_stats)
+ return XDP_DROP;
+ if (pckt.flags & (1 << 1))
+ lru_stats->v1 += 1;
+ else
+ lru_stats->v2 += 1;
+ }
+ if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map))
+ return XDP_DROP;
+ data_stats->v2 += 1;
+ }
+out:
+ cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos);
+ if (!cval)
+ return XDP_DROP;
+ if (dst->flags & (1 << 0)) {
+ if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes))
+ return XDP_DROP;
+ } else {
+ if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes))
+ return XDP_DROP;
+ }
+ vip_num = vip_info->vip_num;
+ data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+ if (!data_stats)
+ return XDP_DROP;
+ data_stats->v1 += 1;
+ data_stats->v2 += pkt_bytes;
+
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+ if (data + 4 > data_end)
+ return XDP_DROP;
+ *(u32 *)data = dst->dst;
+ return XDP_DROP;
+}
+
+__attribute__ ((section("xdp-test"), used))
+int balancer_ingress(struct xdp_md *ctx)
+{
+ void *data = (void *)(long)ctx->data;
+ void *data_end = (void *)(long)ctx->data_end;
+ struct eth_hdr *eth = data;
+ __u32 eth_proto;
+ __u32 nh_off;
+
+ nh_off = sizeof(struct eth_hdr);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+ eth_proto = eth->eth_proto;
+ if (eth_proto == 8)
+ return process_packet(data, nh_off, data_end, 0, ctx);
+ else if (eth_proto == 56710)
+ return process_packet(data, nh_off, data_end, 1, ctx);
+ else
+ return XDP_DROP;
+}
+
+char _license[] __attribute__ ((section("license"), used)) = "GPL";
+int _version __attribute__ ((section("version"), used)) = 1;
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.c b/tools/testing/selftests/bpf/test_xdp_redirect.c
new file mode 100644
index 000000000..ef9e704be
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.c
@@ -0,0 +1,28 @@
+/* Copyright (c) 2017 VMware
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+SEC("redirect_to_111")
+int xdp_redirect_to_111(struct xdp_md *xdp)
+{
+ return bpf_redirect(111, 0);
+}
+SEC("redirect_to_222")
+int xdp_redirect_to_222(struct xdp_md *xdp)
+{
+ return bpf_redirect(222, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
new file mode 100755
index 000000000..c4b17e08d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -0,0 +1,61 @@
+#!/bin/sh
+# Create 2 namespaces with two veth peers, and
+# forward packets in-between using generic XDP
+#
+# NS1(veth11) NS2(veth22)
+# | |
+# | |
+# (veth1, ------ (veth2,
+# id:111) id:222)
+# | xdp forwarding |
+# ------------------
+
+cleanup()
+{
+ if [ "$?" = "0" ]; then
+ echo "selftests: test_xdp_redirect [PASS]";
+ else
+ echo "selftests: test_xdp_redirect [FAILED]";
+ fi
+
+ set +e
+ ip link del veth1 2> /dev/null
+ ip link del veth2 2> /dev/null
+ ip netns del ns1 2> /dev/null
+ ip netns del ns2 2> /dev/null
+}
+
+ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
+if [ $? -ne 0 ];then
+ echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
+ exit 0
+fi
+set -e
+
+ip netns add ns1
+ip netns add ns2
+
+trap cleanup 0 2 3 6 9
+
+ip link add veth1 index 111 type veth peer name veth11
+ip link add veth2 index 222 type veth peer name veth22
+
+ip link set veth11 netns ns1
+ip link set veth22 netns ns2
+
+ip link set veth1 up
+ip link set veth2 up
+
+ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth11
+ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth22
+
+ip netns exec ns1 ip link set dev veth11 up
+ip netns exec ns2 ip link set dev veth22 up
+
+ip link set dev veth1 xdpgeneric obj test_xdp_redirect.o sec redirect_to_222
+ip link set dev veth2 xdpgeneric obj test_xdp_redirect.o sec redirect_to_111
+
+ip netns exec ns1 ping -c 1 10.1.1.22
+ip netns exec ns2 ping -c 1 10.1.1.11
+
+exit 0
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
new file mode 100644
index 000000000..82922f13d
--- /dev/null
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <poll.h>
+#include <unistd.h>
+#include <linux/perf_event.h>
+#include <sys/mman.h>
+#include "trace_helpers.h"
+
+#define MAX_SYMS 300000
+static struct ksym syms[MAX_SYMS];
+static int sym_cnt;
+
+static int ksym_cmp(const void *p1, const void *p2)
+{
+ return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
+}
+
+int load_kallsyms(void)
+{
+ FILE *f = fopen("/proc/kallsyms", "r");
+ char func[256], buf[256];
+ char symbol;
+ void *addr;
+ int i = 0;
+
+ if (!f)
+ return -ENOENT;
+
+ while (!feof(f)) {
+ if (!fgets(buf, sizeof(buf), f))
+ break;
+ if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
+ break;
+ if (!addr)
+ continue;
+ syms[i].addr = (long) addr;
+ syms[i].name = strdup(func);
+ i++;
+ }
+ fclose(f);
+ sym_cnt = i;
+ qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
+ return 0;
+}
+
+struct ksym *ksym_search(long key)
+{
+ int start = 0, end = sym_cnt;
+ int result;
+
+ /* kallsyms not loaded. return NULL */
+ if (sym_cnt <= 0)
+ return NULL;
+
+ while (start < end) {
+ size_t mid = start + (end - start) / 2;
+
+ result = key - syms[mid].addr;
+ if (result < 0)
+ end = mid;
+ else if (result > 0)
+ start = mid + 1;
+ else
+ return &syms[mid];
+ }
+
+ if (start >= 1 && syms[start - 1].addr < key &&
+ key < syms[start].addr)
+ /* valid ksym */
+ return &syms[start - 1];
+
+ /* out of range. return _stext */
+ return &syms[0];
+}
+
+long ksym_get_addr(const char *name)
+{
+ int i;
+
+ for (i = 0; i < sym_cnt; i++) {
+ if (strcmp(syms[i].name, name) == 0)
+ return syms[i].addr;
+ }
+
+ return 0;
+}
+
+static int page_size;
+static int page_cnt = 8;
+static struct perf_event_mmap_page *header;
+
+int perf_event_mmap_header(int fd, struct perf_event_mmap_page **header)
+{
+ void *base;
+ int mmap_size;
+
+ page_size = getpagesize();
+ mmap_size = page_size * (page_cnt + 1);
+
+ base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (base == MAP_FAILED) {
+ printf("mmap err\n");
+ return -1;
+ }
+
+ *header = base;
+ return 0;
+}
+
+int perf_event_mmap(int fd)
+{
+ return perf_event_mmap_header(fd, &header);
+}
+
+static int perf_event_poll(int fd)
+{
+ struct pollfd pfd = { .fd = fd, .events = POLLIN };
+
+ return poll(&pfd, 1, 1000);
+}
+
+struct perf_event_sample {
+ struct perf_event_header header;
+ __u32 size;
+ char data[];
+};
+
+static enum bpf_perf_event_ret bpf_perf_event_print(void *event, void *priv)
+{
+ struct perf_event_sample *e = event;
+ perf_event_print_fn fn = priv;
+ int ret;
+
+ if (e->header.type == PERF_RECORD_SAMPLE) {
+ ret = fn(e->data, e->size);
+ if (ret != LIBBPF_PERF_EVENT_CONT)
+ return ret;
+ } else if (e->header.type == PERF_RECORD_LOST) {
+ struct {
+ struct perf_event_header header;
+ __u64 id;
+ __u64 lost;
+ } *lost = (void *) e;
+ printf("lost %lld events\n", lost->lost);
+ } else {
+ printf("unknown event type=%d size=%d\n",
+ e->header.type, e->header.size);
+ }
+
+ return LIBBPF_PERF_EVENT_CONT;
+}
+
+int perf_event_poller(int fd, perf_event_print_fn output_fn)
+{
+ enum bpf_perf_event_ret ret;
+ void *buf = NULL;
+ size_t len = 0;
+
+ for (;;) {
+ perf_event_poll(fd);
+ ret = bpf_perf_event_read_simple(header, page_cnt * page_size,
+ page_size, &buf, &len,
+ bpf_perf_event_print,
+ output_fn);
+ if (ret != LIBBPF_PERF_EVENT_CONT)
+ break;
+ }
+ free(buf);
+
+ return ret;
+}
+
+int perf_event_poller_multi(int *fds, struct perf_event_mmap_page **headers,
+ int num_fds, perf_event_print_fn output_fn)
+{
+ enum bpf_perf_event_ret ret;
+ struct pollfd *pfds;
+ void *buf = NULL;
+ size_t len = 0;
+ int i;
+
+ pfds = calloc(num_fds, sizeof(*pfds));
+ if (!pfds)
+ return LIBBPF_PERF_EVENT_ERROR;
+
+ for (i = 0; i < num_fds; i++) {
+ pfds[i].fd = fds[i];
+ pfds[i].events = POLLIN;
+ }
+
+ for (;;) {
+ poll(pfds, num_fds, 1000);
+ for (i = 0; i < num_fds; i++) {
+ if (!pfds[i].revents)
+ continue;
+
+ ret = bpf_perf_event_read_simple(headers[i],
+ page_cnt * page_size,
+ page_size, &buf, &len,
+ bpf_perf_event_print,
+ output_fn);
+ if (ret != LIBBPF_PERF_EVENT_CONT)
+ break;
+ }
+ }
+ free(buf);
+ free(pfds);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
new file mode 100644
index 000000000..18924f23d
--- /dev/null
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TRACE_HELPER_H
+#define __TRACE_HELPER_H
+
+#include <libbpf.h>
+#include <linux/perf_event.h>
+
+struct ksym {
+ long addr;
+ char *name;
+};
+
+int load_kallsyms(void);
+struct ksym *ksym_search(long key);
+long ksym_get_addr(const char *name);
+
+typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size);
+
+int perf_event_mmap(int fd);
+int perf_event_mmap_header(int fd, struct perf_event_mmap_page **header);
+/* return LIBBPF_PERF_EVENT_DONE or LIBBPF_PERF_EVENT_ERROR */
+int perf_event_poller(int fd, perf_event_print_fn output_fn);
+int perf_event_poller_multi(int *fds, struct perf_event_mmap_page **headers,
+ int num_fds, perf_event_print_fn output_fn);
+#endif
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
new file mode 100644
index 000000000..9de8b7cb4
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -0,0 +1,28 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+
+#define BUF_SIZE 256
+
+int main(int argc, char *argv[])
+{
+ int fd = open("/dev/urandom", O_RDONLY);
+ int i;
+ char buf[BUF_SIZE];
+ int count = 4;
+
+ if (fd < 0)
+ return 1;
+
+ if (argc == 2)
+ count = atoi(argv[1]);
+
+ for (i = 0; i < count; ++i)
+ read(fd, buf, BUF_SIZE);
+
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/breakpoints/.gitignore b/tools/testing/selftests/breakpoints/.gitignore
new file mode 100644
index 000000000..a23bb4a6f
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/.gitignore
@@ -0,0 +1,2 @@
+breakpoint_test
+step_after_suspend_test
diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile
new file mode 100644
index 000000000..9ec2c78de
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+# Taken from perf makefile
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+TEST_GEN_PROGS := step_after_suspend_test
+
+ifeq ($(ARCH),x86)
+TEST_GEN_PROGS += breakpoint_test
+endif
+ifneq (,$(filter $(ARCH),aarch64 arm64))
+TEST_GEN_PROGS += breakpoint_test_arm64
+endif
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c
new file mode 100644
index 000000000..901b85ea6
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/breakpoint_test.c
@@ -0,0 +1,401 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
+ *
+ * Licensed under the terms of the GNU GPL License version 2
+ *
+ * Selftests for breakpoints (and more generally the do_debug() path) in x86.
+ */
+
+
+#include <sys/ptrace.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/user.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <string.h>
+
+#include "../kselftest.h"
+
+
+/* Breakpoint access modes */
+enum {
+ BP_X = 1,
+ BP_RW = 2,
+ BP_W = 4,
+};
+
+static pid_t child_pid;
+
+/*
+ * Ensures the child and parent are always "talking" about
+ * the same test sequence. (ie: that we haven't forgotten
+ * to call check_trapped() somewhere).
+ */
+static int nr_tests;
+
+static void set_breakpoint_addr(void *addr, int n)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_POKEUSER, child_pid,
+ offsetof(struct user, u_debugreg[n]), addr);
+ if (ret)
+ ksft_exit_fail_msg("Can't set breakpoint addr: %s\n",
+ strerror(errno));
+}
+
+static void toggle_breakpoint(int n, int type, int len,
+ int local, int global, int set)
+{
+ int ret;
+
+ int xtype, xlen;
+ unsigned long vdr7, dr7;
+
+ switch (type) {
+ case BP_X:
+ xtype = 0;
+ break;
+ case BP_W:
+ xtype = 1;
+ break;
+ case BP_RW:
+ xtype = 3;
+ break;
+ }
+
+ switch (len) {
+ case 1:
+ xlen = 0;
+ break;
+ case 2:
+ xlen = 4;
+ break;
+ case 4:
+ xlen = 0xc;
+ break;
+ case 8:
+ xlen = 8;
+ break;
+ }
+
+ dr7 = ptrace(PTRACE_PEEKUSER, child_pid,
+ offsetof(struct user, u_debugreg[7]), 0);
+
+ vdr7 = (xlen | xtype) << 16;
+ vdr7 <<= 4 * n;
+
+ if (local) {
+ vdr7 |= 1 << (2 * n);
+ vdr7 |= 1 << 8;
+ }
+ if (global) {
+ vdr7 |= 2 << (2 * n);
+ vdr7 |= 1 << 9;
+ }
+
+ if (set)
+ dr7 |= vdr7;
+ else
+ dr7 &= ~vdr7;
+
+ ret = ptrace(PTRACE_POKEUSER, child_pid,
+ offsetof(struct user, u_debugreg[7]), dr7);
+ if (ret) {
+ ksft_print_msg("Can't set dr7: %s\n", strerror(errno));
+ exit(-1);
+ }
+}
+
+/* Dummy variables to test read/write accesses */
+static unsigned long long dummy_var[4];
+
+/* Dummy functions to test execution accesses */
+static void dummy_func(void) { }
+static void dummy_func1(void) { }
+static void dummy_func2(void) { }
+static void dummy_func3(void) { }
+
+static void (*dummy_funcs[])(void) = {
+ dummy_func,
+ dummy_func1,
+ dummy_func2,
+ dummy_func3,
+};
+
+static int trapped;
+
+static void check_trapped(void)
+{
+ /*
+ * If we haven't trapped, wake up the parent
+ * so that it notices the failure.
+ */
+ if (!trapped)
+ kill(getpid(), SIGUSR1);
+ trapped = 0;
+
+ nr_tests++;
+}
+
+static void write_var(int len)
+{
+ char *pcval; short *psval; int *pival; long long *plval;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ switch (len) {
+ case 1:
+ pcval = (char *)&dummy_var[i];
+ *pcval = 0xff;
+ break;
+ case 2:
+ psval = (short *)&dummy_var[i];
+ *psval = 0xffff;
+ break;
+ case 4:
+ pival = (int *)&dummy_var[i];
+ *pival = 0xffffffff;
+ break;
+ case 8:
+ plval = (long long *)&dummy_var[i];
+ *plval = 0xffffffffffffffffLL;
+ break;
+ }
+ check_trapped();
+ }
+}
+
+static void read_var(int len)
+{
+ char cval; short sval; int ival; long long lval;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ switch (len) {
+ case 1:
+ cval = *(char *)&dummy_var[i];
+ break;
+ case 2:
+ sval = *(short *)&dummy_var[i];
+ break;
+ case 4:
+ ival = *(int *)&dummy_var[i];
+ break;
+ case 8:
+ lval = *(long long *)&dummy_var[i];
+ break;
+ }
+ check_trapped();
+ }
+}
+
+/*
+ * Do the r/w/x accesses to trigger the breakpoints. And run
+ * the usual traps.
+ */
+static void trigger_tests(void)
+{
+ int len, local, global, i;
+ char val;
+ int ret;
+
+ ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
+ if (ret) {
+ ksft_print_msg("Can't be traced? %s\n", strerror(errno));
+ return;
+ }
+
+ /* Wake up father so that it sets up the first test */
+ kill(getpid(), SIGUSR1);
+
+ /* Test instruction breakpoints */
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+
+ for (i = 0; i < 4; i++) {
+ dummy_funcs[i]();
+ check_trapped();
+ }
+ }
+ }
+
+ /* Test write watchpoints */
+ for (len = 1; len <= sizeof(long); len <<= 1) {
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+ write_var(len);
+ }
+ }
+ }
+
+ /* Test read/write watchpoints (on read accesses) */
+ for (len = 1; len <= sizeof(long); len <<= 1) {
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+ read_var(len);
+ }
+ }
+ }
+
+ /* Icebp trap */
+ asm(".byte 0xf1\n");
+ check_trapped();
+
+ /* Int 3 trap */
+ asm("int $3\n");
+ check_trapped();
+
+ kill(getpid(), SIGUSR1);
+}
+
+static void check_success(const char *msg)
+{
+ int child_nr_tests;
+ int status;
+ int ret;
+
+ /* Wait for the child to SIGTRAP */
+ wait(&status);
+
+ ret = 0;
+
+ if (WSTOPSIG(status) == SIGTRAP) {
+ child_nr_tests = ptrace(PTRACE_PEEKDATA, child_pid,
+ &nr_tests, 0);
+ if (child_nr_tests == nr_tests)
+ ret = 1;
+ if (ptrace(PTRACE_POKEDATA, child_pid, &trapped, 1))
+ ksft_exit_fail_msg("Can't poke: %s\n", strerror(errno));
+ }
+
+ nr_tests++;
+
+ if (ret)
+ ksft_test_result_pass(msg);
+ else
+ ksft_test_result_fail(msg);
+}
+
+static void launch_instruction_breakpoints(char *buf, int local, int global)
+{
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ set_breakpoint_addr(dummy_funcs[i], i);
+ toggle_breakpoint(i, BP_X, 1, local, global, 1);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ sprintf(buf, "Test breakpoint %d with local: %d global: %d\n",
+ i, local, global);
+ check_success(buf);
+ toggle_breakpoint(i, BP_X, 1, local, global, 0);
+ }
+}
+
+static void launch_watchpoints(char *buf, int mode, int len,
+ int local, int global)
+{
+ const char *mode_str;
+ int i;
+
+ if (mode == BP_W)
+ mode_str = "write";
+ else
+ mode_str = "read";
+
+ for (i = 0; i < 4; i++) {
+ set_breakpoint_addr(&dummy_var[i], i);
+ toggle_breakpoint(i, mode, len, local, global, 1);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ sprintf(buf,
+ "Test %s watchpoint %d with len: %d local: %d global: %d\n",
+ mode_str, i, len, local, global);
+ check_success(buf);
+ toggle_breakpoint(i, mode, len, local, global, 0);
+ }
+}
+
+/* Set the breakpoints and check the child successfully trigger them */
+static void launch_tests(void)
+{
+ char buf[1024];
+ int len, local, global, i;
+
+ /* Instruction breakpoints */
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+ launch_instruction_breakpoints(buf, local, global);
+ }
+ }
+
+ /* Write watchpoint */
+ for (len = 1; len <= sizeof(long); len <<= 1) {
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+ launch_watchpoints(buf, BP_W, len,
+ local, global);
+ }
+ }
+ }
+
+ /* Read-Write watchpoint */
+ for (len = 1; len <= sizeof(long); len <<= 1) {
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+ launch_watchpoints(buf, BP_RW, len,
+ local, global);
+ }
+ }
+ }
+
+ /* Icebp traps */
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success("Test icebp\n");
+
+ /* Int 3 traps */
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success("Test int 3 trap\n");
+
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+}
+
+int main(int argc, char **argv)
+{
+ pid_t pid;
+ int ret;
+
+ ksft_print_header();
+
+ pid = fork();
+ if (!pid) {
+ trigger_tests();
+ exit(0);
+ }
+
+ child_pid = pid;
+
+ wait(NULL);
+
+ launch_tests();
+
+ wait(NULL);
+
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
new file mode 100644
index 000000000..2d95e5add
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Original Code by Pavel Labath <labath@google.com>
+ *
+ * Code modified by Pratyush Anand <panand@redhat.com>
+ * for testing different byte select for each access size.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <asm/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/ptrace.h>
+#include <sys/param.h>
+#include <sys/uio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <elf.h>
+#include <errno.h>
+#include <signal.h>
+
+#include "../kselftest.h"
+
+static volatile uint8_t var[96] __attribute__((__aligned__(32)));
+
+static void child(int size, int wr)
+{
+ volatile uint8_t *addr = &var[32 + wr];
+
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) {
+ ksft_print_msg(
+ "ptrace(PTRACE_TRACEME) failed: %s\n",
+ strerror(errno));
+ _exit(1);
+ }
+
+ if (raise(SIGSTOP) != 0) {
+ ksft_print_msg(
+ "raise(SIGSTOP) failed: %s\n", strerror(errno));
+ _exit(1);
+ }
+
+ if ((uintptr_t) addr % size) {
+ ksft_print_msg(
+ "Wrong address write for the given size: %s\n",
+ strerror(errno));
+ _exit(1);
+ }
+
+ switch (size) {
+ case 1:
+ *addr = 47;
+ break;
+ case 2:
+ *(uint16_t *)addr = 47;
+ break;
+ case 4:
+ *(uint32_t *)addr = 47;
+ break;
+ case 8:
+ *(uint64_t *)addr = 47;
+ break;
+ case 16:
+ __asm__ volatile ("stp x29, x30, %0" : "=m" (addr[0]));
+ break;
+ case 32:
+ __asm__ volatile ("stp q29, q30, %0" : "=m" (addr[0]));
+ break;
+ }
+
+ _exit(0);
+}
+
+static bool set_watchpoint(pid_t pid, int size, int wp)
+{
+ const volatile uint8_t *addr = &var[32 + wp];
+ const int offset = (uintptr_t)addr % 8;
+ const unsigned int byte_mask = ((1 << size) - 1) << offset;
+ const unsigned int type = 2; /* Write */
+ const unsigned int enable = 1;
+ const unsigned int control = byte_mask << 5 | type << 3 | enable;
+ struct user_hwdebug_state dreg_state;
+ struct iovec iov;
+
+ memset(&dreg_state, 0, sizeof(dreg_state));
+ dreg_state.dbg_regs[0].addr = (uintptr_t)(addr - offset);
+ dreg_state.dbg_regs[0].ctrl = control;
+ iov.iov_base = &dreg_state;
+ iov.iov_len = offsetof(struct user_hwdebug_state, dbg_regs) +
+ sizeof(dreg_state.dbg_regs[0]);
+ if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_HW_WATCH, &iov) == 0)
+ return true;
+
+ if (errno == EIO)
+ ksft_print_msg(
+ "ptrace(PTRACE_SETREGSET, NT_ARM_HW_WATCH) not supported on this hardware: %s\n",
+ strerror(errno));
+
+ ksft_print_msg(
+ "ptrace(PTRACE_SETREGSET, NT_ARM_HW_WATCH) failed: %s\n",
+ strerror(errno));
+ return false;
+}
+
+static bool run_test(int wr_size, int wp_size, int wr, int wp)
+{
+ int status;
+ siginfo_t siginfo;
+ pid_t pid = fork();
+ pid_t wpid;
+
+ if (pid < 0) {
+ ksft_test_result_fail(
+ "fork() failed: %s\n", strerror(errno));
+ return false;
+ }
+ if (pid == 0)
+ child(wr_size, wr);
+
+ wpid = waitpid(pid, &status, __WALL);
+ if (wpid != pid) {
+ ksft_print_msg(
+ "waitpid() failed: %s\n", strerror(errno));
+ return false;
+ }
+ if (!WIFSTOPPED(status)) {
+ ksft_print_msg(
+ "child did not stop: %s\n", strerror(errno));
+ return false;
+ }
+ if (WSTOPSIG(status) != SIGSTOP) {
+ ksft_print_msg("child did not stop with SIGSTOP\n");
+ return false;
+ }
+
+ if (!set_watchpoint(pid, wp_size, wp))
+ return false;
+
+ if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) {
+ ksft_print_msg(
+ "ptrace(PTRACE_SINGLESTEP) failed: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ alarm(3);
+ wpid = waitpid(pid, &status, __WALL);
+ if (wpid != pid) {
+ ksft_print_msg(
+ "waitpid() failed: %s\n", strerror(errno));
+ return false;
+ }
+ alarm(0);
+ if (WIFEXITED(status)) {
+ ksft_print_msg("child did not single-step\n");
+ return false;
+ }
+ if (!WIFSTOPPED(status)) {
+ ksft_print_msg("child did not stop\n");
+ return false;
+ }
+ if (WSTOPSIG(status) != SIGTRAP) {
+ ksft_print_msg("child did not stop with SIGTRAP\n");
+ return false;
+ }
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo) != 0) {
+ ksft_print_msg(
+ "ptrace(PTRACE_GETSIGINFO): %s\n",
+ strerror(errno));
+ return false;
+ }
+ if (siginfo.si_code != TRAP_HWBKPT) {
+ ksft_print_msg(
+ "Unexpected si_code %d\n", siginfo.si_code);
+ return false;
+ }
+
+ kill(pid, SIGKILL);
+ wpid = waitpid(pid, &status, 0);
+ if (wpid != pid) {
+ ksft_print_msg(
+ "waitpid() failed: %s\n", strerror(errno));
+ return false;
+ }
+ return true;
+}
+
+static void sigalrm(int sig)
+{
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+ bool succeeded = true;
+ struct sigaction act;
+ int wr, wp, size;
+ bool result;
+
+ ksft_print_header();
+
+ act.sa_handler = sigalrm;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ sigaction(SIGALRM, &act, NULL);
+ for (size = 1; size <= 32; size = size*2) {
+ for (wr = 0; wr <= 32; wr = wr + size) {
+ for (wp = wr - size; wp <= wr + size; wp = wp + size) {
+ result = run_test(size, MIN(size, 8), wr, wp);
+ if ((result && wr == wp) ||
+ (!result && wr != wp))
+ ksft_test_result_pass(
+ "Test size = %d write offset = %d watchpoint offset = %d\n",
+ size, wr, wp);
+ else {
+ ksft_test_result_fail(
+ "Test size = %d write offset = %d watchpoint offset = %d\n",
+ size, wr, wp);
+ succeeded = false;
+ }
+ }
+ }
+ }
+
+ for (size = 1; size <= 32; size = size*2) {
+ if (run_test(size, 8, -size, -8))
+ ksft_test_result_pass(
+ "Test size = %d write offset = %d watchpoint offset = -8\n",
+ size, -size);
+ else {
+ ksft_test_result_fail(
+ "Test size = %d write offset = %d watchpoint offset = -8\n",
+ size, -size);
+ succeeded = false;
+ }
+ }
+
+ if (succeeded)
+ ksft_exit_pass();
+ else
+ ksft_exit_fail();
+}
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
new file mode 100644
index 000000000..f82dcc1f8
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+#include <sys/stat.h>
+#include <sys/timerfd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
+
+void child(int cpu)
+{
+ cpu_set_t set;
+
+ CPU_ZERO(&set);
+ CPU_SET(cpu, &set);
+ if (sched_setaffinity(0, sizeof(set), &set) != 0) {
+ ksft_print_msg("sched_setaffinity() failed: %s\n",
+ strerror(errno));
+ _exit(1);
+ }
+
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) {
+ ksft_print_msg("ptrace(PTRACE_TRACEME) failed: %s\n",
+ strerror(errno));
+ _exit(1);
+ }
+
+ if (raise(SIGSTOP) != 0) {
+ ksft_print_msg("raise(SIGSTOP) failed: %s\n", strerror(errno));
+ _exit(1);
+ }
+
+ _exit(0);
+}
+
+bool run_test(int cpu)
+{
+ int status;
+ pid_t pid = fork();
+ pid_t wpid;
+
+ if (pid < 0) {
+ ksft_print_msg("fork() failed: %s\n", strerror(errno));
+ return false;
+ }
+ if (pid == 0)
+ child(cpu);
+
+ wpid = waitpid(pid, &status, __WALL);
+ if (wpid != pid) {
+ ksft_print_msg("waitpid() failed: %s\n", strerror(errno));
+ return false;
+ }
+ if (!WIFSTOPPED(status)) {
+ ksft_print_msg("child did not stop: %s\n", strerror(errno));
+ return false;
+ }
+ if (WSTOPSIG(status) != SIGSTOP) {
+ ksft_print_msg("child did not stop with SIGSTOP: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ if (ptrace(PTRACE_SINGLESTEP, pid, NULL, NULL) < 0) {
+ if (errno == EIO) {
+ ksft_exit_skip(
+ "ptrace(PTRACE_SINGLESTEP) not supported on this architecture: %s\n",
+ strerror(errno));
+ }
+ ksft_print_msg("ptrace(PTRACE_SINGLESTEP) failed: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ wpid = waitpid(pid, &status, __WALL);
+ if (wpid != pid) {
+ ksft_print_msg("waitpid() failed: $s\n", strerror(errno));
+ return false;
+ }
+ if (WIFEXITED(status)) {
+ ksft_print_msg("child did not single-step: %s\n",
+ strerror(errno));
+ return false;
+ }
+ if (!WIFSTOPPED(status)) {
+ ksft_print_msg("child did not stop: %s\n", strerror(errno));
+ return false;
+ }
+ if (WSTOPSIG(status) != SIGTRAP) {
+ ksft_print_msg("child did not stop with SIGTRAP: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) {
+ ksft_print_msg("ptrace(PTRACE_CONT) failed: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ wpid = waitpid(pid, &status, __WALL);
+ if (wpid != pid) {
+ ksft_print_msg("waitpid() failed: %s\n", strerror(errno));
+ return false;
+ }
+ if (!WIFEXITED(status)) {
+ ksft_print_msg("child did not exit after PTRACE_CONT: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ return true;
+}
+
+void suspend(void)
+{
+ int power_state_fd;
+ struct sigevent event = {};
+ int timerfd;
+ int err;
+ struct itimerspec spec = {};
+
+ if (getuid() != 0)
+ ksft_exit_skip("Please run the test as root - Exiting.\n");
+
+ power_state_fd = open("/sys/power/state", O_RDWR);
+ if (power_state_fd < 0)
+ ksft_exit_fail_msg(
+ "open(\"/sys/power/state\") failed %s)\n",
+ strerror(errno));
+
+ timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0);
+ if (timerfd < 0)
+ ksft_exit_fail_msg("timerfd_create() failed\n");
+
+ spec.it_value.tv_sec = 5;
+ err = timerfd_settime(timerfd, 0, &spec, NULL);
+ if (err < 0)
+ ksft_exit_fail_msg("timerfd_settime() failed\n");
+
+ if (write(power_state_fd, "mem", strlen("mem")) != strlen("mem"))
+ ksft_exit_fail_msg("Failed to enter Suspend state\n");
+
+ close(timerfd);
+ close(power_state_fd);
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+ bool do_suspend = true;
+ bool succeeded = true;
+ cpu_set_t available_cpus;
+ int err;
+ int cpu;
+
+ ksft_print_header();
+
+ while ((opt = getopt(argc, argv, "n")) != -1) {
+ switch (opt) {
+ case 'n':
+ do_suspend = false;
+ break;
+ default:
+ printf("Usage: %s [-n]\n", argv[0]);
+ printf(" -n: do not trigger a suspend/resume cycle before the test\n");
+ return -1;
+ }
+ }
+
+ if (do_suspend)
+ suspend();
+
+ err = sched_getaffinity(0, sizeof(available_cpus), &available_cpus);
+ if (err < 0)
+ ksft_exit_fail_msg("sched_getaffinity() failed\n");
+
+ for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+ bool test_success;
+
+ if (!CPU_ISSET(cpu, &available_cpus))
+ continue;
+
+ test_success = run_test(cpu);
+ if (test_success) {
+ ksft_test_result_pass("CPU %d\n", cpu);
+ } else {
+ ksft_test_result_fail("CPU %d\n", cpu);
+ succeeded = false;
+ }
+ }
+
+ if (succeeded)
+ ksft_exit_pass();
+ else
+ ksft_exit_fail();
+}
diff --git a/tools/testing/selftests/capabilities/.gitignore b/tools/testing/selftests/capabilities/.gitignore
new file mode 100644
index 000000000..b732dd0d4
--- /dev/null
+++ b/tools/testing/selftests/capabilities/.gitignore
@@ -0,0 +1,2 @@
+test_execve
+validate_cap
diff --git a/tools/testing/selftests/capabilities/Makefile b/tools/testing/selftests/capabilities/Makefile
new file mode 100644
index 000000000..6e9d98d45
--- /dev/null
+++ b/tools/testing/selftests/capabilities/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_FILES := validate_cap
+TEST_GEN_PROGS := test_execve
+
+CFLAGS += -O2 -g -std=gnu99 -Wall
+LDLIBS += -lcap-ng -lrt -ldl
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c
new file mode 100644
index 000000000..3ab39a61b
--- /dev/null
+++ b/tools/testing/selftests/capabilities/test_execve.c
@@ -0,0 +1,460 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <cap-ng.h>
+#include <linux/capability.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <sched.h>
+#include <sys/mount.h>
+#include <limits.h>
+#include <libgen.h>
+#include <malloc.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+
+#include "../kselftest.h"
+
+#ifndef PR_CAP_AMBIENT
+#define PR_CAP_AMBIENT 47
+# define PR_CAP_AMBIENT_IS_SET 1
+# define PR_CAP_AMBIENT_RAISE 2
+# define PR_CAP_AMBIENT_LOWER 3
+# define PR_CAP_AMBIENT_CLEAR_ALL 4
+#endif
+
+static int nerrs;
+static pid_t mpid; /* main() pid is used to avoid duplicate test counts */
+
+static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
+{
+ char buf[4096];
+ int fd;
+ ssize_t written;
+ int buf_len;
+
+ buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
+ if (buf_len < 0)
+ ksft_exit_fail_msg("vsnprintf failed - %s\n", strerror(errno));
+
+ if (buf_len >= sizeof(buf))
+ ksft_exit_fail_msg("vsnprintf output truncated\n");
+
+
+ fd = open(filename, O_WRONLY);
+ if (fd < 0) {
+ if ((errno == ENOENT) && enoent_ok)
+ return;
+ ksft_exit_fail_msg("open of %s failed - %s\n",
+ filename, strerror(errno));
+ }
+ written = write(fd, buf, buf_len);
+ if (written != buf_len) {
+ if (written >= 0) {
+ ksft_exit_fail_msg("short write to %s\n", filename);
+ } else {
+ ksft_exit_fail_msg("write to %s failed - %s\n",
+ filename, strerror(errno));
+ }
+ }
+ if (close(fd) != 0) {
+ ksft_exit_fail_msg("close of %s failed - %s\n",
+ filename, strerror(errno));
+ }
+}
+
+static void maybe_write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(true, filename, fmt, ap);
+ va_end(ap);
+}
+
+static void write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(false, filename, fmt, ap);
+ va_end(ap);
+}
+
+static bool create_and_enter_ns(uid_t inner_uid)
+{
+ uid_t outer_uid;
+ gid_t outer_gid;
+ int i;
+ bool have_outer_privilege;
+
+ outer_uid = getuid();
+ outer_gid = getgid();
+
+ /*
+ * TODO: If we're already root, we could skip creating the userns.
+ */
+
+ if (unshare(CLONE_NEWNS) == 0) {
+ ksft_print_msg("[NOTE]\tUsing global UIDs for tests\n");
+ if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0)
+ ksft_exit_fail_msg("PR_SET_KEEPCAPS - %s\n",
+ strerror(errno));
+ if (setresuid(inner_uid, inner_uid, -1) != 0)
+ ksft_exit_fail_msg("setresuid - %s\n", strerror(errno));
+
+ // Re-enable effective caps
+ capng_get_caps_process();
+ for (i = 0; i < CAP_LAST_CAP; i++)
+ if (capng_have_capability(CAPNG_PERMITTED, i))
+ capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, i);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ ksft_exit_fail_msg(
+ "capng_apply - %s\n", strerror(errno));
+
+ have_outer_privilege = true;
+ } else if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == 0) {
+ ksft_print_msg("[NOTE]\tUsing a user namespace for tests\n");
+ maybe_write_file("/proc/self/setgroups", "deny");
+ write_file("/proc/self/uid_map", "%d %d 1", inner_uid, outer_uid);
+ write_file("/proc/self/gid_map", "0 %d 1", outer_gid);
+
+ have_outer_privilege = false;
+ } else {
+ ksft_exit_skip("must be root or be able to create a userns\n");
+ }
+
+ if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL) != 0)
+ ksft_exit_fail_msg("remount everything private - %s\n",
+ strerror(errno));
+
+ return have_outer_privilege;
+}
+
+static void chdir_to_tmpfs(void)
+{
+ char cwd[PATH_MAX];
+ if (getcwd(cwd, sizeof(cwd)) != cwd)
+ ksft_exit_fail_msg("getcwd - %s\n", strerror(errno));
+
+ if (mount("private_tmp", ".", "tmpfs", 0, "mode=0777") != 0)
+ ksft_exit_fail_msg("mount private tmpfs - %s\n",
+ strerror(errno));
+
+ if (chdir(cwd) != 0)
+ ksft_exit_fail_msg("chdir to private tmpfs - %s\n",
+ strerror(errno));
+}
+
+static void copy_fromat_to(int fromfd, const char *fromname, const char *toname)
+{
+ int from = openat(fromfd, fromname, O_RDONLY);
+ if (from == -1)
+ ksft_exit_fail_msg("open copy source - %s\n", strerror(errno));
+
+ int to = open(toname, O_CREAT | O_WRONLY | O_EXCL, 0700);
+
+ while (true) {
+ char buf[4096];
+ ssize_t sz = read(from, buf, sizeof(buf));
+ if (sz == 0)
+ break;
+ if (sz < 0)
+ ksft_exit_fail_msg("read - %s\n", strerror(errno));
+
+ if (write(to, buf, sz) != sz)
+ /* no short writes on tmpfs */
+ ksft_exit_fail_msg("write - %s\n", strerror(errno));
+ }
+
+ close(from);
+ close(to);
+}
+
+static bool fork_wait(void)
+{
+ pid_t child = fork();
+ if (child == 0) {
+ nerrs = 0;
+ return true;
+ } else if (child > 0) {
+ int status;
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ ksft_print_msg("Child died\n");
+ nerrs++;
+ } else if (WEXITSTATUS(status) != 0) {
+ ksft_print_msg("Child failed\n");
+ nerrs++;
+ } else {
+ /* don't print this message for mpid */
+ if (getpid() != mpid)
+ ksft_test_result_pass("Passed\n");
+ }
+ return false;
+ } else {
+ ksft_exit_fail_msg("fork - %s\n", strerror(errno));
+ return false;
+ }
+}
+
+static void exec_other_validate_cap(const char *name,
+ bool eff, bool perm, bool inh, bool ambient)
+{
+ execl(name, name, (eff ? "1" : "0"),
+ (perm ? "1" : "0"), (inh ? "1" : "0"), (ambient ? "1" : "0"),
+ NULL);
+ ksft_exit_fail_msg("execl - %s\n", strerror(errno));
+}
+
+static void exec_validate_cap(bool eff, bool perm, bool inh, bool ambient)
+{
+ exec_other_validate_cap("./validate_cap", eff, perm, inh, ambient);
+}
+
+static int do_tests(int uid, const char *our_path)
+{
+ bool have_outer_privilege = create_and_enter_ns(uid);
+
+ int ourpath_fd = open(our_path, O_RDONLY | O_DIRECTORY);
+ if (ourpath_fd == -1)
+ ksft_exit_fail_msg("open '%s' - %s\n",
+ our_path, strerror(errno));
+
+ chdir_to_tmpfs();
+
+ copy_fromat_to(ourpath_fd, "validate_cap", "validate_cap");
+
+ if (have_outer_privilege) {
+ uid_t gid = getegid();
+
+ copy_fromat_to(ourpath_fd, "validate_cap",
+ "validate_cap_suidroot");
+ if (chown("validate_cap_suidroot", 0, -1) != 0)
+ ksft_exit_fail_msg("chown - %s\n", strerror(errno));
+ if (chmod("validate_cap_suidroot", S_ISUID | 0700) != 0)
+ ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
+
+ copy_fromat_to(ourpath_fd, "validate_cap",
+ "validate_cap_suidnonroot");
+ if (chown("validate_cap_suidnonroot", uid + 1, -1) != 0)
+ ksft_exit_fail_msg("chown - %s\n", strerror(errno));
+ if (chmod("validate_cap_suidnonroot", S_ISUID | 0700) != 0)
+ ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
+
+ copy_fromat_to(ourpath_fd, "validate_cap",
+ "validate_cap_sgidroot");
+ if (chown("validate_cap_sgidroot", -1, 0) != 0)
+ ksft_exit_fail_msg("chown - %s\n", strerror(errno));
+ if (chmod("validate_cap_sgidroot", S_ISGID | 0710) != 0)
+ ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
+
+ copy_fromat_to(ourpath_fd, "validate_cap",
+ "validate_cap_sgidnonroot");
+ if (chown("validate_cap_sgidnonroot", -1, gid + 1) != 0)
+ ksft_exit_fail_msg("chown - %s\n", strerror(errno));
+ if (chmod("validate_cap_sgidnonroot", S_ISGID | 0710) != 0)
+ ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
+ }
+
+ capng_get_caps_process();
+
+ /* Make sure that i starts out clear */
+ capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
+
+ if (uid == 0) {
+ ksft_print_msg("[RUN]\tRoot => ep\n");
+ if (fork_wait())
+ exec_validate_cap(true, true, false, false);
+ } else {
+ ksft_print_msg("[RUN]\tNon-root => no caps\n");
+ if (fork_wait())
+ exec_validate_cap(false, false, false, false);
+ }
+
+ ksft_print_msg("Check cap_ambient manipulation rules\n");
+
+ /* We should not be able to add ambient caps yet. */
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != -1 || errno != EPERM) {
+ if (errno == EINVAL)
+ ksft_test_result_fail(
+ "PR_CAP_AMBIENT_RAISE isn't supported\n");
+ else
+ ksft_test_result_fail(
+ "PR_CAP_AMBIENT_RAISE should have failed eith EPERM on a non-inheritable cap\n");
+ return 1;
+ }
+ ksft_test_result_pass(
+ "PR_CAP_AMBIENT_RAISE failed on non-inheritable cap\n");
+
+ capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_RAW);
+ capng_update(CAPNG_DROP, CAPNG_PERMITTED, CAP_NET_RAW);
+ capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, CAP_NET_RAW);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_RAW, 0, 0, 0) != -1 || errno != EPERM) {
+ ksft_test_result_fail(
+ "PR_CAP_AMBIENT_RAISE should have failed on a non-permitted cap\n");
+ return 1;
+ }
+ ksft_test_result_pass(
+ "PR_CAP_AMBIENT_RAISE failed on non-permitted cap\n");
+
+ capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
+ ksft_test_result_fail(
+ "PR_CAP_AMBIENT_RAISE should have succeeded\n");
+ return 1;
+ }
+ ksft_test_result_pass("PR_CAP_AMBIENT_RAISE worked\n");
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 1) {
+ ksft_test_result_fail("PR_CAP_AMBIENT_IS_SET is broken\n");
+ return 1;
+ }
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0, 0) != 0)
+ ksft_exit_fail_msg("PR_CAP_AMBIENT_CLEAR_ALL - %s\n",
+ strerror(errno));
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
+ ksft_test_result_fail(
+ "PR_CAP_AMBIENT_CLEAR_ALL didn't work\n");
+ return 1;
+ }
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0)
+ ksft_exit_fail_msg("PR_CAP_AMBIENT_RAISE - %s\n",
+ strerror(errno));
+
+ capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
+ ksft_test_result_fail("Dropping I should have dropped A\n");
+ return 1;
+ }
+
+ ksft_test_result_pass("Basic manipulation appears to work\n");
+
+ capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
+ if (uid == 0) {
+ ksft_print_msg("[RUN]\tRoot +i => eip\n");
+ if (fork_wait())
+ exec_validate_cap(true, true, true, false);
+ } else {
+ ksft_print_msg("[RUN]\tNon-root +i => i\n");
+ if (fork_wait())
+ exec_validate_cap(false, false, true, false);
+ }
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0)
+ ksft_exit_fail_msg("PR_CAP_AMBIENT_RAISE - %s\n",
+ strerror(errno));
+
+ ksft_print_msg("[RUN]\tUID %d +ia => eipa\n", uid);
+ if (fork_wait())
+ exec_validate_cap(true, true, true, true);
+
+ /* The remaining tests need real privilege */
+
+ if (!have_outer_privilege) {
+ ksft_test_result_skip("SUID/SGID tests (needs privilege)\n");
+ goto done;
+ }
+
+ if (uid == 0) {
+ ksft_print_msg("[RUN]\tRoot +ia, suidroot => eipa\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_suidroot",
+ true, true, true, true);
+
+ ksft_print_msg("[RUN]\tRoot +ia, suidnonroot => ip\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_suidnonroot",
+ false, true, true, false);
+
+ ksft_print_msg("[RUN]\tRoot +ia, sgidroot => eipa\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_sgidroot",
+ true, true, true, true);
+
+ if (fork_wait()) {
+ ksft_print_msg(
+ "[RUN]\tRoot, gid != 0, +ia, sgidroot => eip\n");
+ if (setresgid(1, 1, 1) != 0)
+ ksft_exit_fail_msg("setresgid - %s\n",
+ strerror(errno));
+ exec_other_validate_cap("./validate_cap_sgidroot",
+ true, true, true, false);
+ }
+
+ ksft_print_msg("[RUN]\tRoot +ia, sgidnonroot => eip\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_sgidnonroot",
+ true, true, true, false);
+ } else {
+ ksft_print_msg("[RUN]\tNon-root +ia, sgidnonroot => i\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_sgidnonroot",
+ false, false, true, false);
+
+ if (fork_wait()) {
+ ksft_print_msg("[RUN]\tNon-root +ia, sgidroot => i\n");
+ if (setresgid(1, 1, 1) != 0)
+ ksft_exit_fail_msg("setresgid - %s\n",
+ strerror(errno));
+ exec_other_validate_cap("./validate_cap_sgidroot",
+ false, false, true, false);
+ }
+ }
+
+done:
+ ksft_print_cnts();
+ return nerrs ? 1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+ char *tmp1, *tmp2, *our_path;
+
+ ksft_print_header();
+
+ /* Find our path */
+ tmp1 = strdup(argv[0]);
+ if (!tmp1)
+ ksft_exit_fail_msg("strdup - %s\n", strerror(errno));
+ tmp2 = dirname(tmp1);
+ our_path = strdup(tmp2);
+ if (!our_path)
+ ksft_exit_fail_msg("strdup - %s\n", strerror(errno));
+ free(tmp1);
+
+ mpid = getpid();
+
+ if (fork_wait()) {
+ ksft_print_msg("[RUN]\t+++ Tests with uid == 0 +++\n");
+ return do_tests(0, our_path);
+ }
+
+ ksft_print_msg("==================================================\n");
+
+ if (fork_wait()) {
+ ksft_print_msg("[RUN]\t+++ Tests with uid != 0 +++\n");
+ return do_tests(1, our_path);
+ }
+
+ return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c
new file mode 100644
index 000000000..cdfc94268
--- /dev/null
+++ b/tools/testing/selftests/capabilities/validate_cap.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <cap-ng.h>
+#include <linux/capability.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/prctl.h>
+#include <sys/auxv.h>
+
+#include "../kselftest.h"
+
+#ifndef PR_CAP_AMBIENT
+#define PR_CAP_AMBIENT 47
+# define PR_CAP_AMBIENT_IS_SET 1
+# define PR_CAP_AMBIENT_RAISE 2
+# define PR_CAP_AMBIENT_LOWER 3
+# define PR_CAP_AMBIENT_CLEAR_ALL 4
+#endif
+
+#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 19)
+# define HAVE_GETAUXVAL
+#endif
+
+static bool bool_arg(char **argv, int i)
+{
+ if (!strcmp(argv[i], "0"))
+ return false;
+ else if (!strcmp(argv[i], "1"))
+ return true;
+ else {
+ ksft_exit_fail_msg("wrong argv[%d]\n", i);
+ return false;
+ }
+}
+
+int main(int argc, char **argv)
+{
+ const char *atsec = "";
+
+ /*
+ * Be careful just in case a setgid or setcapped copy of this
+ * helper gets out.
+ */
+
+ if (argc != 5)
+ ksft_exit_fail_msg("wrong argc\n");
+
+#ifdef HAVE_GETAUXVAL
+ if (getauxval(AT_SECURE))
+ atsec = " (AT_SECURE is set)";
+ else
+ atsec = " (AT_SECURE is not set)";
+#endif
+
+ capng_get_caps_process();
+
+ if (capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 1)) {
+ ksft_print_msg("Wrong effective state%s\n", atsec);
+ return 1;
+ }
+
+ if (capng_have_capability(CAPNG_PERMITTED, CAP_NET_BIND_SERVICE) != bool_arg(argv, 2)) {
+ ksft_print_msg("Wrong permitted state%s\n", atsec);
+ return 1;
+ }
+
+ if (capng_have_capability(CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 3)) {
+ ksft_print_msg("Wrong inheritable state%s\n", atsec);
+ return 1;
+ }
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != bool_arg(argv, 4)) {
+ ksft_print_msg("Wrong ambient state%s\n", atsec);
+ return 1;
+ }
+
+ ksft_print_msg("%s: Capabilities after execve were correct\n",
+ "validate_cap:");
+ return 0;
+}
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
new file mode 100644
index 000000000..adacda50a
--- /dev/null
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -0,0 +1,2 @@
+test_memcontrol
+test_core
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
new file mode 100644
index 000000000..23fbaa4a9
--- /dev/null
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wall
+
+all:
+
+TEST_GEN_PROGS = test_memcontrol
+TEST_GEN_PROGS += test_core
+
+include ../lib.mk
+
+$(OUTPUT)/test_memcontrol: cgroup_util.c
+$(OUTPUT)/test_core: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
new file mode 100644
index 000000000..a516d01f0
--- /dev/null
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -0,0 +1,370 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "cgroup_util.h"
+
+static ssize_t read_text(const char *path, char *buf, size_t max_len)
+{
+ ssize_t len;
+ int fd;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ len = read(fd, buf, max_len - 1);
+ if (len < 0)
+ goto out;
+
+ buf[len] = 0;
+out:
+ close(fd);
+ return len;
+}
+
+static ssize_t write_text(const char *path, char *buf, ssize_t len)
+{
+ int fd;
+
+ fd = open(path, O_WRONLY | O_APPEND);
+ if (fd < 0)
+ return fd;
+
+ len = write(fd, buf, len);
+ if (len < 0) {
+ close(fd);
+ return len;
+ }
+
+ close(fd);
+
+ return len;
+}
+
+char *cg_name(const char *root, const char *name)
+{
+ size_t len = strlen(root) + strlen(name) + 2;
+ char *ret = malloc(len);
+
+ snprintf(ret, len, "%s/%s", root, name);
+
+ return ret;
+}
+
+char *cg_name_indexed(const char *root, const char *name, int index)
+{
+ size_t len = strlen(root) + strlen(name) + 10;
+ char *ret = malloc(len);
+
+ snprintf(ret, len, "%s/%s_%d", root, name, index);
+
+ return ret;
+}
+
+int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
+{
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), "%s/%s", cgroup, control);
+
+ if (read_text(path, buf, len) >= 0)
+ return 0;
+
+ return -1;
+}
+
+int cg_read_strcmp(const char *cgroup, const char *control,
+ const char *expected)
+{
+ size_t size;
+ char *buf;
+ int ret;
+
+ /* Handle the case of comparing against empty string */
+ if (!expected)
+ return -1;
+ else
+ size = strlen(expected) + 1;
+
+ buf = malloc(size);
+ if (!buf)
+ return -1;
+
+ if (cg_read(cgroup, control, buf, size)) {
+ free(buf);
+ return -1;
+ }
+
+ ret = strcmp(expected, buf);
+ free(buf);
+ return ret;
+}
+
+int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
+{
+ char buf[PAGE_SIZE];
+
+ if (cg_read(cgroup, control, buf, sizeof(buf)))
+ return -1;
+
+ return strstr(buf, needle) ? 0 : -1;
+}
+
+long cg_read_long(const char *cgroup, const char *control)
+{
+ char buf[128];
+
+ if (cg_read(cgroup, control, buf, sizeof(buf)))
+ return -1;
+
+ return atol(buf);
+}
+
+long cg_read_key_long(const char *cgroup, const char *control, const char *key)
+{
+ char buf[PAGE_SIZE];
+ char *ptr;
+
+ if (cg_read(cgroup, control, buf, sizeof(buf)))
+ return -1;
+
+ ptr = strstr(buf, key);
+ if (!ptr)
+ return -1;
+
+ return atol(ptr + strlen(key));
+}
+
+int cg_write(const char *cgroup, const char *control, char *buf)
+{
+ char path[PATH_MAX];
+ ssize_t len = strlen(buf);
+
+ snprintf(path, sizeof(path), "%s/%s", cgroup, control);
+
+ if (write_text(path, buf, len) == len)
+ return 0;
+
+ return -1;
+}
+
+int cg_find_unified_root(char *root, size_t len)
+{
+ char buf[10 * PAGE_SIZE];
+ char *fs, *mount, *type;
+ const char delim[] = "\n\t ";
+
+ if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
+ return -1;
+
+ /*
+ * Example:
+ * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
+ */
+ for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
+ mount = strtok(NULL, delim);
+ type = strtok(NULL, delim);
+ strtok(NULL, delim);
+ strtok(NULL, delim);
+ strtok(NULL, delim);
+
+ if (strcmp(type, "cgroup2") == 0) {
+ strncpy(root, mount, len);
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+int cg_create(const char *cgroup)
+{
+ return mkdir(cgroup, 0755);
+}
+
+static int cg_killall(const char *cgroup)
+{
+ char buf[PAGE_SIZE];
+ char *ptr = buf;
+
+ if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
+ return -1;
+
+ while (ptr < buf + sizeof(buf)) {
+ int pid = strtol(ptr, &ptr, 10);
+
+ if (pid == 0)
+ break;
+ if (*ptr)
+ ptr++;
+ else
+ break;
+ if (kill(pid, SIGKILL))
+ return -1;
+ }
+
+ return 0;
+}
+
+int cg_destroy(const char *cgroup)
+{
+ int ret;
+
+retry:
+ ret = rmdir(cgroup);
+ if (ret && errno == EBUSY) {
+ ret = cg_killall(cgroup);
+ if (ret)
+ return ret;
+ usleep(100);
+ goto retry;
+ }
+
+ if (ret && errno == ENOENT)
+ ret = 0;
+
+ return ret;
+}
+
+int cg_enter_current(const char *cgroup)
+{
+ char pidbuf[64];
+
+ snprintf(pidbuf, sizeof(pidbuf), "%d", getpid());
+ return cg_write(cgroup, "cgroup.procs", pidbuf);
+}
+
+int cg_run(const char *cgroup,
+ int (*fn)(const char *cgroup, void *arg),
+ void *arg)
+{
+ int pid, retcode;
+
+ pid = fork();
+ if (pid < 0) {
+ return pid;
+ } else if (pid == 0) {
+ char buf[64];
+
+ snprintf(buf, sizeof(buf), "%d", getpid());
+ if (cg_write(cgroup, "cgroup.procs", buf))
+ exit(EXIT_FAILURE);
+ exit(fn(cgroup, arg));
+ } else {
+ waitpid(pid, &retcode, 0);
+ if (WIFEXITED(retcode))
+ return WEXITSTATUS(retcode);
+ else
+ return -1;
+ }
+}
+
+int cg_run_nowait(const char *cgroup,
+ int (*fn)(const char *cgroup, void *arg),
+ void *arg)
+{
+ int pid;
+
+ pid = fork();
+ if (pid == 0) {
+ char buf[64];
+
+ snprintf(buf, sizeof(buf), "%d", getpid());
+ if (cg_write(cgroup, "cgroup.procs", buf))
+ exit(EXIT_FAILURE);
+ exit(fn(cgroup, arg));
+ }
+
+ return pid;
+}
+
+int get_temp_fd(void)
+{
+ return open(".", O_TMPFILE | O_RDWR | O_EXCL);
+}
+
+int alloc_pagecache(int fd, size_t size)
+{
+ char buf[PAGE_SIZE];
+ struct stat st;
+ int i;
+
+ if (fstat(fd, &st))
+ goto cleanup;
+
+ size += st.st_size;
+
+ if (ftruncate(fd, size))
+ goto cleanup;
+
+ for (i = 0; i < size; i += sizeof(buf))
+ read(fd, buf, sizeof(buf));
+
+ return 0;
+
+cleanup:
+ return -1;
+}
+
+int alloc_anon(const char *cgroup, void *arg)
+{
+ size_t size = (unsigned long)arg;
+ char *buf, *ptr;
+
+ buf = malloc(size);
+ for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ *ptr = 0;
+
+ free(buf);
+ return 0;
+}
+
+int is_swap_enabled(void)
+{
+ char buf[PAGE_SIZE];
+ const char delim[] = "\n";
+ int cnt = 0;
+ char *line;
+
+ if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
+ return -1;
+
+ for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
+ cnt++;
+
+ return cnt > 1;
+}
+
+int set_oom_adj_score(int pid, int score)
+{
+ char path[PATH_MAX];
+ int fd, len;
+
+ sprintf(path, "/proc/%d/oom_score_adj", pid);
+
+ fd = open(path, O_WRONLY | O_APPEND);
+ if (fd < 0)
+ return fd;
+
+ len = dprintf(fd, "%d", score);
+ if (len < 0) {
+ close(fd);
+ return len;
+ }
+
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
new file mode 100644
index 000000000..9ac8b7958
--- /dev/null
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdlib.h>
+
+#define PAGE_SIZE 4096
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define MB(x) (x << 20)
+
+/*
+ * Checks if two given values differ by less than err% of their sum.
+ */
+static inline int values_close(long a, long b, int err)
+{
+ return abs(a - b) <= (a + b) / 100 * err;
+}
+
+extern int cg_find_unified_root(char *root, size_t len);
+extern char *cg_name(const char *root, const char *name);
+extern char *cg_name_indexed(const char *root, const char *name, int index);
+extern int cg_create(const char *cgroup);
+extern int cg_destroy(const char *cgroup);
+extern int cg_read(const char *cgroup, const char *control,
+ char *buf, size_t len);
+extern int cg_read_strcmp(const char *cgroup, const char *control,
+ const char *expected);
+extern int cg_read_strstr(const char *cgroup, const char *control,
+ const char *needle);
+extern long cg_read_long(const char *cgroup, const char *control);
+long cg_read_key_long(const char *cgroup, const char *control, const char *key);
+extern int cg_write(const char *cgroup, const char *control, char *buf);
+extern int cg_run(const char *cgroup,
+ int (*fn)(const char *cgroup, void *arg),
+ void *arg);
+extern int cg_enter_current(const char *cgroup);
+extern int cg_run_nowait(const char *cgroup,
+ int (*fn)(const char *cgroup, void *arg),
+ void *arg);
+extern int get_temp_fd(void);
+extern int alloc_pagecache(int fd, size_t size);
+extern int alloc_anon(const char *cgroup, void *arg);
+extern int is_swap_enabled(void);
+extern int set_oom_adj_score(int pid, int score);
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
new file mode 100644
index 000000000..599234c5e
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -0,0 +1,567 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+#include <linux/limits.h>
+#include <linux/sched.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+/*
+ * A(0) - B(0) - C(1)
+ * \ D(0)
+ *
+ * A, B and C's "populated" fields would be 1 while D's 0.
+ * test that after the one process in C is moved to root,
+ * A,B and C's "populated" fields would flip to "0" and file
+ * modified events will be generated on the
+ * "cgroup.events" files of both cgroups.
+ */
+static int test_cgcore_populated(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cg_test_a = NULL, *cg_test_b = NULL;
+ char *cg_test_c = NULL, *cg_test_d = NULL;
+
+ cg_test_a = cg_name(root, "cg_test_a");
+ cg_test_b = cg_name(root, "cg_test_a/cg_test_b");
+ cg_test_c = cg_name(root, "cg_test_a/cg_test_b/cg_test_c");
+ cg_test_d = cg_name(root, "cg_test_a/cg_test_b/cg_test_d");
+
+ if (!cg_test_a || !cg_test_b || !cg_test_c || !cg_test_d)
+ goto cleanup;
+
+ if (cg_create(cg_test_a))
+ goto cleanup;
+
+ if (cg_create(cg_test_b))
+ goto cleanup;
+
+ if (cg_create(cg_test_c))
+ goto cleanup;
+
+ if (cg_create(cg_test_d))
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_c))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_test_a, "cgroup.events", "populated 1\n"))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_test_b, "cgroup.events", "populated 1\n"))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_test_c, "cgroup.events", "populated 1\n"))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
+ goto cleanup;
+
+ if (cg_enter_current(root))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_test_a, "cgroup.events", "populated 0\n"))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_test_b, "cgroup.events", "populated 0\n"))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_test_c, "cgroup.events", "populated 0\n"))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (cg_test_d)
+ cg_destroy(cg_test_d);
+ if (cg_test_c)
+ cg_destroy(cg_test_c);
+ if (cg_test_b)
+ cg_destroy(cg_test_b);
+ if (cg_test_a)
+ cg_destroy(cg_test_a);
+ free(cg_test_d);
+ free(cg_test_c);
+ free(cg_test_b);
+ free(cg_test_a);
+ return ret;
+}
+
+/*
+ * A (domain threaded) - B (threaded) - C (domain)
+ *
+ * test that C can't be used until it is turned into a
+ * threaded cgroup. "cgroup.type" file will report "domain (invalid)" in
+ * these cases. Operations which fail due to invalid topology use
+ * EOPNOTSUPP as the errno.
+ */
+static int test_cgcore_invalid_domain(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *grandparent = NULL, *parent = NULL, *child = NULL;
+
+ grandparent = cg_name(root, "cg_test_grandparent");
+ parent = cg_name(root, "cg_test_grandparent/cg_test_parent");
+ child = cg_name(root, "cg_test_grandparent/cg_test_parent/cg_test_child");
+ if (!parent || !child || !grandparent)
+ goto cleanup;
+
+ if (cg_create(grandparent))
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.type", "threaded"))
+ goto cleanup;
+
+ if (cg_read_strcmp(child, "cgroup.type", "domain invalid\n"))
+ goto cleanup;
+
+ if (!cg_enter_current(child))
+ goto cleanup;
+
+ if (errno != EOPNOTSUPP)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ if (grandparent)
+ cg_destroy(grandparent);
+ free(child);
+ free(parent);
+ free(grandparent);
+ return ret;
+}
+
+/*
+ * Test that when a child becomes threaded
+ * the parent type becomes domain threaded.
+ */
+static int test_cgcore_parent_becomes_threaded(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent = NULL, *child = NULL;
+
+ parent = cg_name(root, "cg_test_parent");
+ child = cg_name(root, "cg_test_parent/cg_test_child");
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(child, "cgroup.type", "threaded"))
+ goto cleanup;
+
+ if (cg_read_strcmp(parent, "cgroup.type", "domain threaded\n"))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ free(child);
+ free(parent);
+ return ret;
+
+}
+
+/*
+ * Test that there's no internal process constrain on threaded cgroups.
+ * You can add threads/processes on a parent with a controller enabled.
+ */
+static int test_cgcore_no_internal_process_constraint_on_threads(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent = NULL, *child = NULL;
+
+ if (cg_read_strstr(root, "cgroup.controllers", "cpu") ||
+ cg_write(root, "cgroup.subtree_control", "+cpu")) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ parent = cg_name(root, "cg_test_parent");
+ child = cg_name(root, "cg_test_parent/cg_test_child");
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.type", "threaded"))
+ goto cleanup;
+
+ if (cg_write(child, "cgroup.type", "threaded"))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
+ goto cleanup;
+
+ if (cg_enter_current(parent))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ cg_enter_current(root);
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ free(child);
+ free(parent);
+ return ret;
+}
+
+/*
+ * Test that you can't enable a controller on a child if it's not enabled
+ * on the parent.
+ */
+static int test_cgcore_top_down_constraint_enable(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent = NULL, *child = NULL;
+
+ parent = cg_name(root, "cg_test_parent");
+ child = cg_name(root, "cg_test_parent/cg_test_child");
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (!cg_write(child, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ free(child);
+ free(parent);
+ return ret;
+}
+
+/*
+ * Test that you can't disable a controller on a parent
+ * if it's enabled in a child.
+ */
+static int test_cgcore_top_down_constraint_disable(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent = NULL, *child = NULL;
+
+ parent = cg_name(root, "cg_test_parent");
+ child = cg_name(root, "cg_test_parent/cg_test_child");
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_write(child, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (!cg_write(parent, "cgroup.subtree_control", "-memory"))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ free(child);
+ free(parent);
+ return ret;
+}
+
+/*
+ * Test internal process constraint.
+ * You can't add a pid to a domain parent if a controller is enabled.
+ */
+static int test_cgcore_internal_process_constraint(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent = NULL, *child = NULL;
+
+ parent = cg_name(root, "cg_test_parent");
+ child = cg_name(root, "cg_test_parent/cg_test_child");
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (!cg_enter_current(parent))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ free(child);
+ free(parent);
+ return ret;
+}
+
+/*
+ * cgroup migration permission check should be performed based on the
+ * credentials at the time of open instead of write.
+ */
+static int test_cgcore_lesser_euid_open(const char *root)
+{
+ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
+ int ret = KSFT_FAIL;
+ char *cg_test_a = NULL, *cg_test_b = NULL;
+ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
+ int cg_test_b_procs_fd = -1;
+ uid_t saved_uid;
+
+ cg_test_a = cg_name(root, "cg_test_a");
+ cg_test_b = cg_name(root, "cg_test_b");
+
+ if (!cg_test_a || !cg_test_b)
+ goto cleanup;
+
+ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
+ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
+
+ if (!cg_test_a_procs || !cg_test_b_procs)
+ goto cleanup;
+
+ if (cg_create(cg_test_a) || cg_create(cg_test_b))
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_a))
+ goto cleanup;
+
+ if (chown(cg_test_a_procs, test_euid, -1) ||
+ chown(cg_test_b_procs, test_euid, -1))
+ goto cleanup;
+
+ saved_uid = geteuid();
+ if (seteuid(test_euid))
+ goto cleanup;
+
+ cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR);
+
+ if (seteuid(saved_uid))
+ goto cleanup;
+
+ if (cg_test_b_procs_fd < 0)
+ goto cleanup;
+
+ if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ if (cg_test_b_procs_fd >= 0)
+ close(cg_test_b_procs_fd);
+ if (cg_test_b)
+ cg_destroy(cg_test_b);
+ if (cg_test_a)
+ cg_destroy(cg_test_a);
+ free(cg_test_b_procs);
+ free(cg_test_a_procs);
+ free(cg_test_b);
+ free(cg_test_a);
+ return ret;
+}
+
+struct lesser_ns_open_thread_arg {
+ const char *path;
+ int fd;
+ int err;
+};
+
+static int lesser_ns_open_thread_fn(void *arg)
+{
+ struct lesser_ns_open_thread_arg *targ = arg;
+
+ targ->fd = open(targ->path, O_RDWR);
+ targ->err = errno;
+ return 0;
+}
+
+/*
+ * cgroup migration permission check should be performed based on the cgroup
+ * namespace at the time of open instead of write.
+ */
+static int test_cgcore_lesser_ns_open(const char *root)
+{
+ static char stack[65536];
+ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
+ int ret = KSFT_FAIL;
+ char *cg_test_a = NULL, *cg_test_b = NULL;
+ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
+ int cg_test_b_procs_fd = -1;
+ struct lesser_ns_open_thread_arg targ = { .fd = -1 };
+ pid_t pid;
+ int status;
+
+ cg_test_a = cg_name(root, "cg_test_a");
+ cg_test_b = cg_name(root, "cg_test_b");
+
+ if (!cg_test_a || !cg_test_b)
+ goto cleanup;
+
+ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
+ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
+
+ if (!cg_test_a_procs || !cg_test_b_procs)
+ goto cleanup;
+
+ if (cg_create(cg_test_a) || cg_create(cg_test_b))
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_b))
+ goto cleanup;
+
+ if (chown(cg_test_a_procs, test_euid, -1) ||
+ chown(cg_test_b_procs, test_euid, -1))
+ goto cleanup;
+
+ targ.path = cg_test_b_procs;
+ pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack),
+ CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD,
+ &targ);
+ if (pid < 0)
+ goto cleanup;
+
+ if (waitpid(pid, &status, 0) < 0)
+ goto cleanup;
+
+ if (!WIFEXITED(status))
+ goto cleanup;
+
+ cg_test_b_procs_fd = targ.fd;
+ if (cg_test_b_procs_fd < 0)
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_a))
+ goto cleanup;
+
+ if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ if (cg_test_b_procs_fd >= 0)
+ close(cg_test_b_procs_fd);
+ if (cg_test_b)
+ cg_destroy(cg_test_b);
+ if (cg_test_a)
+ cg_destroy(cg_test_a);
+ free(cg_test_b_procs);
+ free(cg_test_a_procs);
+ free(cg_test_b);
+ free(cg_test_a);
+ return ret;
+}
+
+#define T(x) { x, #x }
+struct corecg_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_cgcore_internal_process_constraint),
+ T(test_cgcore_top_down_constraint_enable),
+ T(test_cgcore_top_down_constraint_disable),
+ T(test_cgcore_no_internal_process_constraint_on_threads),
+ T(test_cgcore_parent_becomes_threaded),
+ T(test_cgcore_invalid_domain),
+ T(test_cgcore_populated),
+ T(test_cgcore_lesser_euid_open),
+ T(test_cgcore_lesser_ns_open),
+};
+#undef T
+
+int main(int argc, char *argv[])
+{
+ char root[PATH_MAX];
+ int i, ret = EXIT_SUCCESS;
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
+ if (cg_write(root, "cgroup.subtree_control", "+memory"))
+ ksft_exit_skip("Failed to set memory controller\n");
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
new file mode 100644
index 000000000..c19a97dd0
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -0,0 +1,1228 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <linux/oom.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+/*
+ * This test creates two nested cgroups with and without enabling
+ * the memory controller.
+ */
+static int test_memcg_subtree_control(const char *root)
+{
+ char *parent, *child, *parent2 = NULL, *child2 = NULL;
+ int ret = KSFT_FAIL;
+ char buf[PAGE_SIZE];
+
+ /* Create two nested cgroups with the memory controller enabled */
+ parent = cg_name(root, "memcg_test_0");
+ child = cg_name(root, "memcg_test_0/memcg_test_1");
+ if (!parent || !child)
+ goto cleanup_free;
+
+ if (cg_create(parent))
+ goto cleanup_free;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup_parent;
+
+ if (cg_create(child))
+ goto cleanup_parent;
+
+ if (cg_read_strstr(child, "cgroup.controllers", "memory"))
+ goto cleanup_child;
+
+ /* Create two nested cgroups without enabling memory controller */
+ parent2 = cg_name(root, "memcg_test_1");
+ child2 = cg_name(root, "memcg_test_1/memcg_test_1");
+ if (!parent2 || !child2)
+ goto cleanup_free2;
+
+ if (cg_create(parent2))
+ goto cleanup_free2;
+
+ if (cg_create(child2))
+ goto cleanup_parent2;
+
+ if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
+ goto cleanup_all;
+
+ if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
+ goto cleanup_all;
+
+ ret = KSFT_PASS;
+
+cleanup_all:
+ cg_destroy(child2);
+cleanup_parent2:
+ cg_destroy(parent2);
+cleanup_free2:
+ free(parent2);
+ free(child2);
+cleanup_child:
+ cg_destroy(child);
+cleanup_parent:
+ cg_destroy(parent);
+cleanup_free:
+ free(parent);
+ free(child);
+
+ return ret;
+}
+
+static int alloc_anon_50M_check(const char *cgroup, void *arg)
+{
+ size_t size = MB(50);
+ char *buf, *ptr;
+ long anon, current;
+ int ret = -1;
+
+ buf = malloc(size);
+ for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ *ptr = 0;
+
+ current = cg_read_long(cgroup, "memory.current");
+ if (current < size)
+ goto cleanup;
+
+ if (!values_close(size, current, 3))
+ goto cleanup;
+
+ anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
+ if (anon < 0)
+ goto cleanup;
+
+ if (!values_close(anon, current, 3))
+ goto cleanup;
+
+ ret = 0;
+cleanup:
+ free(buf);
+ return ret;
+}
+
+static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
+{
+ size_t size = MB(50);
+ int ret = -1;
+ long current, file;
+ int fd;
+
+ fd = get_temp_fd();
+ if (fd < 0)
+ return -1;
+
+ if (alloc_pagecache(fd, size))
+ goto cleanup;
+
+ current = cg_read_long(cgroup, "memory.current");
+ if (current < size)
+ goto cleanup;
+
+ file = cg_read_key_long(cgroup, "memory.stat", "file ");
+ if (file < 0)
+ goto cleanup;
+
+ if (!values_close(file, current, 10))
+ goto cleanup;
+
+ ret = 0;
+
+cleanup:
+ close(fd);
+ return ret;
+}
+
+/*
+ * This test create a memory cgroup, allocates
+ * some anonymous memory and some pagecache
+ * and check memory.current and some memory.stat values.
+ */
+static int test_memcg_current(const char *root)
+{
+ int ret = KSFT_FAIL;
+ long current;
+ char *memcg;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ current = cg_read_long(memcg, "memory.current");
+ if (current != 0)
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_anon_50M_check, NULL))
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+static int alloc_pagecache_50M(const char *cgroup, void *arg)
+{
+ int fd = (long)arg;
+
+ return alloc_pagecache(fd, MB(50));
+}
+
+static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
+{
+ int fd = (long)arg;
+ int ppid = getppid();
+
+ if (alloc_pagecache(fd, MB(50)))
+ return -1;
+
+ while (getppid() == ppid)
+ sleep(1);
+
+ return 0;
+}
+
+static int alloc_anon_noexit(const char *cgroup, void *arg)
+{
+ int ppid = getppid();
+
+ if (alloc_anon(cgroup, arg))
+ return -1;
+
+ while (getppid() == ppid)
+ sleep(1);
+
+ return 0;
+}
+
+/*
+ * Wait until processes are killed asynchronously by the OOM killer
+ * If we exceed a timeout, fail.
+ */
+static int cg_test_proc_killed(const char *cgroup)
+{
+ int limit;
+
+ for (limit = 10; limit > 0; limit--) {
+ if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
+ return 0;
+
+ usleep(100000);
+ }
+ return -1;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A memory.min = 50M, memory.max = 200M
+ * A/B memory.min = 50M, memory.current = 50M
+ * A/B/C memory.min = 75M, memory.current = 50M
+ * A/B/D memory.min = 25M, memory.current = 50M
+ * A/B/E memory.min = 500M, memory.current = 0
+ * A/B/F memory.min = 0, memory.current = 50M
+ *
+ * Usages are pagecache, but the test keeps a running
+ * process in every leaf cgroup.
+ * Then it creates A/G and creates a significant
+ * memory pressure in it.
+ *
+ * A/B memory.current ~= 50M
+ * A/B/C memory.current ~= 33M
+ * A/B/D memory.current ~= 17M
+ * A/B/E memory.current ~= 0
+ *
+ * After that it tries to allocate more than there is
+ * unprotected memory in A available, and checks
+ * checks that memory.min protects pagecache even
+ * in this case.
+ */
+static int test_memcg_min(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent[3] = {NULL};
+ char *children[4] = {NULL};
+ long c[4];
+ int i, attempts;
+ int fd;
+
+ fd = get_temp_fd();
+ if (fd < 0)
+ goto cleanup;
+
+ parent[0] = cg_name(root, "memcg_test_0");
+ if (!parent[0])
+ goto cleanup;
+
+ parent[1] = cg_name(parent[0], "memcg_test_1");
+ if (!parent[1])
+ goto cleanup;
+
+ parent[2] = cg_name(parent[0], "memcg_test_2");
+ if (!parent[2])
+ goto cleanup;
+
+ if (cg_create(parent[0]))
+ goto cleanup;
+
+ if (cg_read_long(parent[0], "memory.min")) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_write(parent[0], "memory.max", "200M"))
+ goto cleanup;
+
+ if (cg_write(parent[0], "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_create(parent[1]))
+ goto cleanup;
+
+ if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_create(parent[2]))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(children); i++) {
+ children[i] = cg_name_indexed(parent[1], "child_memcg", i);
+ if (!children[i])
+ goto cleanup;
+
+ if (cg_create(children[i]))
+ goto cleanup;
+
+ if (i == 2)
+ continue;
+
+ cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
+ (void *)(long)fd);
+ }
+
+ if (cg_write(parent[0], "memory.min", "50M"))
+ goto cleanup;
+ if (cg_write(parent[1], "memory.min", "50M"))
+ goto cleanup;
+ if (cg_write(children[0], "memory.min", "75M"))
+ goto cleanup;
+ if (cg_write(children[1], "memory.min", "25M"))
+ goto cleanup;
+ if (cg_write(children[2], "memory.min", "500M"))
+ goto cleanup;
+ if (cg_write(children[3], "memory.min", "0"))
+ goto cleanup;
+
+ attempts = 0;
+ while (!values_close(cg_read_long(parent[1], "memory.current"),
+ MB(150), 3)) {
+ if (attempts++ > 5)
+ break;
+ sleep(1);
+ }
+
+ if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
+ goto cleanup;
+
+ if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(children); i++)
+ c[i] = cg_read_long(children[i], "memory.current");
+
+ if (!values_close(c[0], MB(33), 10))
+ goto cleanup;
+
+ if (!values_close(c[1], MB(17), 10))
+ goto cleanup;
+
+ if (!values_close(c[2], 0, 1))
+ goto cleanup;
+
+ if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
+ goto cleanup;
+
+ if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
+ if (!children[i])
+ continue;
+
+ cg_destroy(children[i]);
+ free(children[i]);
+ }
+
+ for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
+ if (!parent[i])
+ continue;
+
+ cg_destroy(parent[i]);
+ free(parent[i]);
+ }
+ close(fd);
+ return ret;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A memory.low = 50M, memory.max = 200M
+ * A/B memory.low = 50M, memory.current = 50M
+ * A/B/C memory.low = 75M, memory.current = 50M
+ * A/B/D memory.low = 25M, memory.current = 50M
+ * A/B/E memory.low = 500M, memory.current = 0
+ * A/B/F memory.low = 0, memory.current = 50M
+ *
+ * Usages are pagecache.
+ * Then it creates A/G an creates a significant
+ * memory pressure in it.
+ *
+ * Then it checks actual memory usages and expects that:
+ * A/B memory.current ~= 50M
+ * A/B/ memory.current ~= 33M
+ * A/B/D memory.current ~= 17M
+ * A/B/E memory.current ~= 0
+ *
+ * After that it tries to allocate more than there is
+ * unprotected memory in A available,
+ * and checks low and oom events in memory.events.
+ */
+static int test_memcg_low(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent[3] = {NULL};
+ char *children[4] = {NULL};
+ long low, oom;
+ long c[4];
+ int i;
+ int fd;
+
+ fd = get_temp_fd();
+ if (fd < 0)
+ goto cleanup;
+
+ parent[0] = cg_name(root, "memcg_test_0");
+ if (!parent[0])
+ goto cleanup;
+
+ parent[1] = cg_name(parent[0], "memcg_test_1");
+ if (!parent[1])
+ goto cleanup;
+
+ parent[2] = cg_name(parent[0], "memcg_test_2");
+ if (!parent[2])
+ goto cleanup;
+
+ if (cg_create(parent[0]))
+ goto cleanup;
+
+ if (cg_read_long(parent[0], "memory.low"))
+ goto cleanup;
+
+ if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_write(parent[0], "memory.max", "200M"))
+ goto cleanup;
+
+ if (cg_write(parent[0], "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_create(parent[1]))
+ goto cleanup;
+
+ if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_create(parent[2]))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(children); i++) {
+ children[i] = cg_name_indexed(parent[1], "child_memcg", i);
+ if (!children[i])
+ goto cleanup;
+
+ if (cg_create(children[i]))
+ goto cleanup;
+
+ if (i == 2)
+ continue;
+
+ if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
+ goto cleanup;
+ }
+
+ if (cg_write(parent[0], "memory.low", "50M"))
+ goto cleanup;
+ if (cg_write(parent[1], "memory.low", "50M"))
+ goto cleanup;
+ if (cg_write(children[0], "memory.low", "75M"))
+ goto cleanup;
+ if (cg_write(children[1], "memory.low", "25M"))
+ goto cleanup;
+ if (cg_write(children[2], "memory.low", "500M"))
+ goto cleanup;
+ if (cg_write(children[3], "memory.low", "0"))
+ goto cleanup;
+
+ if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
+ goto cleanup;
+
+ if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(children); i++)
+ c[i] = cg_read_long(children[i], "memory.current");
+
+ if (!values_close(c[0], MB(33), 10))
+ goto cleanup;
+
+ if (!values_close(c[1], MB(17), 10))
+ goto cleanup;
+
+ if (!values_close(c[2], 0, 1))
+ goto cleanup;
+
+ if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
+ fprintf(stderr,
+ "memory.low prevents from allocating anon memory\n");
+ goto cleanup;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(children); i++) {
+ oom = cg_read_key_long(children[i], "memory.events", "oom ");
+ low = cg_read_key_long(children[i], "memory.events", "low ");
+
+ if (oom)
+ goto cleanup;
+ if (i < 2 && low <= 0)
+ goto cleanup;
+ if (i >= 2 && low)
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
+ if (!children[i])
+ continue;
+
+ cg_destroy(children[i]);
+ free(children[i]);
+ }
+
+ for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
+ if (!parent[i])
+ continue;
+
+ cg_destroy(parent[i]);
+ free(parent[i]);
+ }
+ close(fd);
+ return ret;
+}
+
+static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
+{
+ size_t size = MB(50);
+ int ret = -1;
+ long current;
+ int fd;
+
+ fd = get_temp_fd();
+ if (fd < 0)
+ return -1;
+
+ if (alloc_pagecache(fd, size))
+ goto cleanup;
+
+ current = cg_read_long(cgroup, "memory.current");
+ if (current <= MB(29) || current > MB(30))
+ goto cleanup;
+
+ ret = 0;
+
+cleanup:
+ close(fd);
+ return ret;
+
+}
+
+/*
+ * This test checks that memory.high limits the amount of
+ * memory which can be consumed by either anonymous memory
+ * or pagecache.
+ */
+static int test_memcg_high(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *memcg;
+ long high;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ if (cg_read_strcmp(memcg, "memory.high", "max\n"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.high", "30M"))
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
+ goto cleanup;
+
+ high = cg_read_key_long(memcg, "memory.events", "high ");
+ if (high <= 0)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+/*
+ * This test checks that memory.max limits the amount of
+ * memory which can be consumed by either anonymous memory
+ * or pagecache.
+ */
+static int test_memcg_max(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *memcg;
+ long current, max;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ if (cg_read_strcmp(memcg, "memory.max", "max\n"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.max", "30M"))
+ goto cleanup;
+
+ /* Should be killed by OOM killer */
+ if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
+ goto cleanup;
+
+ current = cg_read_long(memcg, "memory.current");
+ if (current > MB(30) || !current)
+ goto cleanup;
+
+ max = cg_read_key_long(memcg, "memory.events", "max ");
+ if (max <= 0)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
+{
+ long mem_max = (long)arg;
+ size_t size = MB(50);
+ char *buf, *ptr;
+ long mem_current, swap_current;
+ int ret = -1;
+
+ buf = malloc(size);
+ for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ *ptr = 0;
+
+ mem_current = cg_read_long(cgroup, "memory.current");
+ if (!mem_current || !values_close(mem_current, mem_max, 3))
+ goto cleanup;
+
+ swap_current = cg_read_long(cgroup, "memory.swap.current");
+ if (!swap_current ||
+ !values_close(mem_current + swap_current, size, 3))
+ goto cleanup;
+
+ ret = 0;
+cleanup:
+ free(buf);
+ return ret;
+}
+
+/*
+ * This test checks that memory.swap.max limits the amount of
+ * anonymous memory which can be swapped out.
+ */
+static int test_memcg_swap_max(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *memcg;
+ long max;
+
+ if (!is_swap_enabled())
+ return KSFT_SKIP;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ if (cg_read_long(memcg, "memory.swap.current")) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_read_strcmp(memcg, "memory.max", "max\n"))
+ goto cleanup;
+
+ if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "30M"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.max", "30M"))
+ goto cleanup;
+
+ /* Should be killed by OOM killer */
+ if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
+ goto cleanup;
+
+ max = cg_read_key_long(memcg, "memory.events", "max ");
+ if (max <= 0)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM. Then it checks for oom and oom_kill events in
+ * memory.events.
+ */
+static int test_memcg_oom_events(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *memcg;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.max", "30M"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_read_strcmp(memcg, "cgroup.procs", ""))
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+struct tcp_server_args {
+ unsigned short port;
+ int ctl[2];
+};
+
+static int tcp_server(const char *cgroup, void *arg)
+{
+ struct tcp_server_args *srv_args = arg;
+ struct sockaddr_in6 saddr = { 0 };
+ socklen_t slen = sizeof(saddr);
+ int sk, client_sk, ctl_fd, yes = 1, ret = -1;
+
+ close(srv_args->ctl[0]);
+ ctl_fd = srv_args->ctl[1];
+
+ saddr.sin6_family = AF_INET6;
+ saddr.sin6_addr = in6addr_any;
+ saddr.sin6_port = htons(srv_args->port);
+
+ sk = socket(AF_INET6, SOCK_STREAM, 0);
+ if (sk < 0)
+ return ret;
+
+ if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
+ goto cleanup;
+
+ if (bind(sk, (struct sockaddr *)&saddr, slen)) {
+ write(ctl_fd, &errno, sizeof(errno));
+ goto cleanup;
+ }
+
+ if (listen(sk, 1))
+ goto cleanup;
+
+ ret = 0;
+ if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
+ ret = -1;
+ goto cleanup;
+ }
+
+ client_sk = accept(sk, NULL, NULL);
+ if (client_sk < 0)
+ goto cleanup;
+
+ ret = -1;
+ for (;;) {
+ uint8_t buf[0x100000];
+
+ if (write(client_sk, buf, sizeof(buf)) <= 0) {
+ if (errno == ECONNRESET)
+ ret = 0;
+ break;
+ }
+ }
+
+ close(client_sk);
+
+cleanup:
+ close(sk);
+ return ret;
+}
+
+static int tcp_client(const char *cgroup, unsigned short port)
+{
+ const char server[] = "localhost";
+ struct addrinfo *ai;
+ char servport[6];
+ int retries = 0x10; /* nice round number */
+ int sk, ret;
+
+ snprintf(servport, sizeof(servport), "%hd", port);
+ ret = getaddrinfo(server, servport, NULL, &ai);
+ if (ret)
+ return ret;
+
+ sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
+ if (sk < 0)
+ goto free_ainfo;
+
+ ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
+ if (ret < 0)
+ goto close_sk;
+
+ ret = KSFT_FAIL;
+ while (retries--) {
+ uint8_t buf[0x100000];
+ long current, sock;
+
+ if (read(sk, buf, sizeof(buf)) <= 0)
+ goto close_sk;
+
+ current = cg_read_long(cgroup, "memory.current");
+ sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
+
+ if (current < 0 || sock < 0)
+ goto close_sk;
+
+ if (current < sock)
+ goto close_sk;
+
+ if (values_close(current, sock, 10)) {
+ ret = KSFT_PASS;
+ break;
+ }
+ }
+
+close_sk:
+ close(sk);
+free_ainfo:
+ freeaddrinfo(ai);
+ return ret;
+}
+
+/*
+ * This test checks socket memory accounting.
+ * The test forks a TCP server listens on a random port between 1000
+ * and 61000. Once it gets a client connection, it starts writing to
+ * its socket.
+ * The TCP client interleaves reads from the socket with check whether
+ * memory.current and memory.stat.sock are similar.
+ */
+static int test_memcg_sock(const char *root)
+{
+ int bind_retries = 5, ret = KSFT_FAIL, pid, err;
+ unsigned short port;
+ char *memcg;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ while (bind_retries--) {
+ struct tcp_server_args args;
+
+ if (pipe(args.ctl))
+ goto cleanup;
+
+ port = args.port = 1000 + rand() % 60000;
+
+ pid = cg_run_nowait(memcg, tcp_server, &args);
+ if (pid < 0)
+ goto cleanup;
+
+ close(args.ctl[1]);
+ if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
+ goto cleanup;
+ close(args.ctl[0]);
+
+ if (!err)
+ break;
+ if (err != EADDRINUSE)
+ goto cleanup;
+
+ waitpid(pid, NULL, 0);
+ }
+
+ if (err == EADDRINUSE) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (tcp_client(memcg, port) != KSFT_PASS)
+ goto cleanup;
+
+ waitpid(pid, &err, 0);
+ if (WEXITSTATUS(err))
+ goto cleanup;
+
+ if (cg_read_long(memcg, "memory.current") < 0)
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.stat", "sock "))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM with memory.group.oom set. Then it checks that all
+ * processes in the leaf (but not the parent) were killed.
+ */
+static int test_memcg_oom_group_leaf_events(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent, *child;
+
+ parent = cg_name(root, "memcg_test_0");
+ child = cg_name(root, "memcg_test_0/memcg_test_1");
+
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_write(child, "memory.max", "50M"))
+ goto cleanup;
+
+ if (cg_write(child, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(child, "memory.oom.group", "1"))
+ goto cleanup;
+
+ cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
+ cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+ cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+ if (!cg_run(child, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_test_proc_killed(child))
+ goto cleanup;
+
+ if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
+ goto cleanup;
+
+ if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ free(child);
+ free(parent);
+
+ return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM with memory.group.oom set. Then it checks that all
+ * processes in the parent and leaf were killed.
+ */
+static int test_memcg_oom_group_parent_events(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent, *child;
+
+ parent = cg_name(root, "memcg_test_0");
+ child = cg_name(root, "memcg_test_0/memcg_test_1");
+
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(parent, "memory.max", "80M"))
+ goto cleanup;
+
+ if (cg_write(parent, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(parent, "memory.oom.group", "1"))
+ goto cleanup;
+
+ cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
+ cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+ cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+
+ if (!cg_run(child, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_test_proc_killed(child))
+ goto cleanup;
+ if (cg_test_proc_killed(parent))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ free(child);
+ free(parent);
+
+ return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM with memory.group.oom set. Then it checks that all
+ * processes were killed except those set with OOM_SCORE_ADJ_MIN
+ */
+static int test_memcg_oom_group_score_events(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *memcg;
+ int safe_pid;
+
+ memcg = cg_name(root, "memcg_test_0");
+
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.max", "50M"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.oom.group", "1"))
+ goto cleanup;
+
+ safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
+ if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
+ goto cleanup;
+
+ cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
+ if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
+ goto cleanup;
+
+ if (kill(safe_pid, SIGKILL))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (memcg)
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+
+#define T(x) { x, #x }
+struct memcg_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_memcg_subtree_control),
+ T(test_memcg_current),
+ T(test_memcg_min),
+ T(test_memcg_low),
+ T(test_memcg_high),
+ T(test_memcg_max),
+ T(test_memcg_oom_events),
+ T(test_memcg_swap_max),
+ T(test_memcg_sock),
+ T(test_memcg_oom_group_leaf_events),
+ T(test_memcg_oom_group_parent_events),
+ T(test_memcg_oom_group_score_events),
+};
+#undef T
+
+int main(int argc, char **argv)
+{
+ char root[PATH_MAX];
+ int i, ret = EXIT_SUCCESS;
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ /*
+ * Check that memory controller is available:
+ * memory is listed in cgroup.controllers
+ */
+ if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+ ksft_exit_skip("memory controller isn't available\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
+ if (cg_write(root, "cgroup.subtree_control", "+memory"))
+ ksft_exit_skip("Failed to set memory controller\n");
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cpu-hotplug/Makefile b/tools/testing/selftests/cpu-hotplug/Makefile
new file mode 100644
index 000000000..d8be047ee
--- /dev/null
+++ b/tools/testing/selftests/cpu-hotplug/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+TEST_PROGS := cpu-on-off-test.sh
+
+include ../lib.mk
+
+run_full_test:
+ @/bin/bash ./cpu-on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]"
+
+clean:
diff --git a/tools/testing/selftests/cpu-hotplug/config b/tools/testing/selftests/cpu-hotplug/config
new file mode 100644
index 000000000..d4aca2ad5
--- /dev/null
+++ b/tools/testing/selftests/cpu-hotplug/config
@@ -0,0 +1 @@
+CONFIG_NOTIFIER_ERROR_INJECTION=y
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
new file mode 100755
index 000000000..0d26b5e3f
--- /dev/null
+++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
@@ -0,0 +1,293 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+SYSFS=
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+prerequisite()
+{
+ msg="skip all tests:"
+
+ if [ $UID != 0 ]; then
+ echo $msg must be run as root >&2
+ exit $ksft_skip
+ fi
+
+ taskset -p 01 $$
+
+ SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+
+ if [ ! -d "$SYSFS" ]; then
+ echo $msg sysfs is not mounted >&2
+ exit $ksft_skip
+ fi
+
+ if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then
+ echo $msg cpu hotplug is not supported >&2
+ exit $ksft_skip
+ fi
+
+ echo "CPU online/offline summary:"
+ online_cpus=`cat $SYSFS/devices/system/cpu/online`
+ online_max=${online_cpus##*-}
+
+ if [[ "$online_cpus" = "$online_max" ]]; then
+ echo "$msg: since there is only one cpu: $online_cpus"
+ exit $ksft_skip
+ fi
+
+ present_cpus=`cat $SYSFS/devices/system/cpu/present`
+ present_max=${present_cpus##*-}
+ echo "present_cpus = $present_cpus present_max = $present_max"
+
+ echo -e "\t Cpus in online state: $online_cpus"
+
+ offline_cpus=`cat $SYSFS/devices/system/cpu/offline`
+ if [[ "a$offline_cpus" = "a" ]]; then
+ offline_cpus=0
+ else
+ offline_max=${offline_cpus##*-}
+ fi
+ echo -e "\t Cpus in offline state: $offline_cpus"
+}
+
+#
+# list all hot-pluggable CPUs
+#
+hotpluggable_cpus()
+{
+ local state=${1:-.\*}
+
+ for cpu in $SYSFS/devices/system/cpu/cpu*; do
+ if [ -f $cpu/online ] && grep -q $state $cpu/online; then
+ echo ${cpu##/*/cpu}
+ fi
+ done
+}
+
+hotplaggable_offline_cpus()
+{
+ hotpluggable_cpus 0
+}
+
+hotpluggable_online_cpus()
+{
+ hotpluggable_cpus 1
+}
+
+cpu_is_online()
+{
+ grep -q 1 $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+cpu_is_offline()
+{
+ grep -q 0 $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+online_cpu()
+{
+ echo 1 > $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+offline_cpu()
+{
+ echo 0 > $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+online_cpu_expect_success()
+{
+ local cpu=$1
+
+ if ! online_cpu $cpu; then
+ echo $FUNCNAME $cpu: unexpected fail >&2
+ exit 1
+ elif ! cpu_is_online $cpu; then
+ echo $FUNCNAME $cpu: unexpected offline >&2
+ exit 1
+ fi
+}
+
+online_cpu_expect_fail()
+{
+ local cpu=$1
+
+ if online_cpu $cpu 2> /dev/null; then
+ echo $FUNCNAME $cpu: unexpected success >&2
+ exit 1
+ elif ! cpu_is_offline $cpu; then
+ echo $FUNCNAME $cpu: unexpected online >&2
+ exit 1
+ fi
+}
+
+offline_cpu_expect_success()
+{
+ local cpu=$1
+
+ if ! offline_cpu $cpu; then
+ echo $FUNCNAME $cpu: unexpected fail >&2
+ exit 1
+ elif ! cpu_is_offline $cpu; then
+ echo $FUNCNAME $cpu: unexpected offline >&2
+ exit 1
+ fi
+}
+
+offline_cpu_expect_fail()
+{
+ local cpu=$1
+
+ if offline_cpu $cpu 2> /dev/null; then
+ echo $FUNCNAME $cpu: unexpected success >&2
+ exit 1
+ elif ! cpu_is_online $cpu; then
+ echo $FUNCNAME $cpu: unexpected offline >&2
+ exit 1
+ fi
+}
+
+error=-12
+allcpus=0
+priority=0
+online_cpus=0
+online_max=0
+offline_cpus=0
+offline_max=0
+present_cpus=0
+present_max=0
+
+while getopts e:ahp: opt; do
+ case $opt in
+ e)
+ error=$OPTARG
+ ;;
+ a)
+ allcpus=1
+ ;;
+ h)
+ echo "Usage $0 [ -a ] [ -e errno ] [ -p notifier-priority ]"
+ echo -e "\t default offline one cpu"
+ echo -e "\t run with -a option to offline all cpus"
+ exit
+ ;;
+ p)
+ priority=$OPTARG
+ ;;
+ esac
+done
+
+if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
+ echo "error code must be -4095 <= errno < 0" >&2
+ exit 1
+fi
+
+prerequisite
+
+#
+# Safe test (default) - offline and online one cpu
+#
+if [ $allcpus -eq 0 ]; then
+ echo "Limited scope test: one hotplug cpu"
+ echo -e "\t (leaves cpu in the original state):"
+ echo -e "\t online to offline to online: cpu $online_max"
+ offline_cpu_expect_success $online_max
+ online_cpu_expect_success $online_max
+
+ if [[ $offline_cpus -gt 0 ]]; then
+ echo -e "\t offline to online to offline: cpu $present_max"
+ online_cpu_expect_success $present_max
+ offline_cpu_expect_success $present_max
+ online_cpu $present_max
+ fi
+ exit 0
+else
+ echo "Full scope test: all hotplug cpus"
+ echo -e "\t online all offline cpus"
+ echo -e "\t offline all online cpus"
+ echo -e "\t online all offline cpus"
+fi
+
+#
+# Online all hot-pluggable CPUs
+#
+for cpu in `hotplaggable_offline_cpus`; do
+ online_cpu_expect_success $cpu
+done
+
+#
+# Offline all hot-pluggable CPUs
+#
+for cpu in `hotpluggable_online_cpus`; do
+ offline_cpu_expect_success $cpu
+done
+
+#
+# Online all hot-pluggable CPUs again
+#
+for cpu in `hotplaggable_offline_cpus`; do
+ online_cpu_expect_success $cpu
+done
+
+#
+# Test with cpu notifier error injection
+#
+
+DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
+NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/cpu
+
+prerequisite_extra()
+{
+ msg="skip extra tests:"
+
+ /sbin/modprobe -q -r cpu-notifier-error-inject
+ /sbin/modprobe -q cpu-notifier-error-inject priority=$priority
+
+ if [ ! -d "$DEBUGFS" ]; then
+ echo $msg debugfs is not mounted >&2
+ exit $ksft_skip
+ fi
+
+ if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
+ echo $msg cpu-notifier-error-inject module is not available >&2
+ exit $ksft_skip
+ fi
+}
+
+prerequisite_extra
+
+#
+# Offline all hot-pluggable CPUs
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
+for cpu in `hotpluggable_online_cpus`; do
+ offline_cpu_expect_success $cpu
+done
+
+#
+# Test CPU hot-add error handling (offline => online)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
+for cpu in `hotplaggable_offline_cpus`; do
+ online_cpu_expect_fail $cpu
+done
+
+#
+# Online all hot-pluggable CPUs
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
+for cpu in `hotplaggable_offline_cpus`; do
+ online_cpu_expect_success $cpu
+done
+
+#
+# Test CPU hot-remove error handling (online => offline)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
+for cpu in `hotpluggable_online_cpus`; do
+ offline_cpu_expect_fail $cpu
+done
+
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
+/sbin/modprobe -q -r cpu-notifier-error-inject
diff --git a/tools/testing/selftests/cpufreq/Makefile b/tools/testing/selftests/cpufreq/Makefile
new file mode 100644
index 000000000..c86ca8342
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+TEST_PROGS := main.sh
+TEST_FILES := cpu.sh cpufreq.sh governor.sh module.sh special-tests.sh
+
+include ../lib.mk
+
+clean:
diff --git a/tools/testing/selftests/cpufreq/config b/tools/testing/selftests/cpufreq/config
new file mode 100644
index 000000000..27ff72ebd
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/config
@@ -0,0 +1,15 @@
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_DEBUG_PI_LIST=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
diff --git a/tools/testing/selftests/cpufreq/cpu.sh b/tools/testing/selftests/cpufreq/cpu.sh
new file mode 100755
index 000000000..39fdcdfb8
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/cpu.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# CPU helpers
+
+# protect against multiple inclusion
+if [ $FILE_CPU ]; then
+ return 0
+else
+ FILE_CPU=DONE
+fi
+
+source cpufreq.sh
+
+for_each_cpu()
+{
+ cpus=$(ls $CPUROOT | grep "cpu[0-9].*")
+ for cpu in $cpus; do
+ $@ $cpu
+ done
+}
+
+for_each_non_boot_cpu()
+{
+ cpus=$(ls $CPUROOT | grep "cpu[1-9].*")
+ for cpu in $cpus; do
+ $@ $cpu
+ done
+}
+
+#$1: cpu
+offline_cpu()
+{
+ printf "Offline $1\n"
+ echo 0 > $CPUROOT/$1/online
+}
+
+#$1: cpu
+online_cpu()
+{
+ printf "Online $1\n"
+ echo 1 > $CPUROOT/$1/online
+}
+
+#$1: cpu
+reboot_cpu()
+{
+ offline_cpu $1
+ online_cpu $1
+}
+
+# Reboot CPUs
+# param: number of times we want to run the loop
+reboot_cpus()
+{
+ printf "** Test: Running ${FUNCNAME[0]} for $1 loops **\n\n"
+
+ for i in `seq 1 $1`; do
+ for_each_non_boot_cpu offline_cpu
+ for_each_non_boot_cpu online_cpu
+ printf "\n"
+ done
+
+ printf "\n%s\n\n" "------------------------------------------------"
+}
+
+# Prints warning for all CPUs with missing cpufreq directory
+print_unmanaged_cpus()
+{
+ for_each_cpu cpu_should_have_cpufreq_directory
+}
+
+# Counts CPUs with cpufreq directories
+count_cpufreq_managed_cpus()
+{
+ count=0;
+
+ for cpu in `ls $CPUROOT | grep "cpu[0-9].*"`; do
+ if [ -d $CPUROOT/$cpu/cpufreq ]; then
+ let count=count+1;
+ fi
+ done
+
+ echo $count;
+}
diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh
new file mode 100755
index 000000000..b583a2fb4
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/cpufreq.sh
@@ -0,0 +1,242 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# protect against multiple inclusion
+if [ $FILE_CPUFREQ ]; then
+ return 0
+else
+ FILE_CPUFREQ=DONE
+fi
+
+source cpu.sh
+
+
+# $1: cpu
+cpu_should_have_cpufreq_directory()
+{
+ if [ ! -d $CPUROOT/$1/cpufreq ]; then
+ printf "Warning: No cpufreq directory present for $1\n"
+ fi
+}
+
+cpu_should_not_have_cpufreq_directory()
+{
+ if [ -d $CPUROOT/$1/cpufreq ]; then
+ printf "Warning: cpufreq directory present for $1\n"
+ fi
+}
+
+for_each_policy()
+{
+ policies=$(ls $CPUFREQROOT| grep "policy[0-9].*")
+ for policy in $policies; do
+ $@ $policy
+ done
+}
+
+for_each_policy_concurrent()
+{
+ policies=$(ls $CPUFREQROOT| grep "policy[0-9].*")
+ for policy in $policies; do
+ $@ $policy &
+ done
+}
+
+# $1: Path
+read_cpufreq_files_in_dir()
+{
+ local files=`ls $1`
+
+ printf "Printing directory: $1\n\n"
+
+ for file in $files; do
+ if [ -f $1/$file ]; then
+ printf "$file:"
+ cat $1/$file
+ else
+ printf "\n"
+ read_cpufreq_files_in_dir "$1/$file"
+ fi
+ done
+ printf "\n"
+}
+
+
+read_all_cpufreq_files()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+ read_cpufreq_files_in_dir $CPUFREQROOT
+
+ printf "%s\n\n" "------------------------------------------------"
+}
+
+
+# UPDATE CPUFREQ FILES
+
+# $1: directory path
+update_cpufreq_files_in_dir()
+{
+ local files=`ls $1`
+
+ printf "Updating directory: $1\n\n"
+
+ for file in $files; do
+ if [ -f $1/$file ]; then
+ # file is writable ?
+ local wfile=$(ls -l $1/$file | awk '$1 ~ /^.*w.*/ { print $NF; }')
+
+ if [ ! -z $wfile ]; then
+ # scaling_setspeed is a special file and we
+ # should skip updating it
+ if [ $file != "scaling_setspeed" ]; then
+ local val=$(cat $1/$file)
+ printf "Writing $val to: $file\n"
+ echo $val > $1/$file
+ fi
+ fi
+ else
+ printf "\n"
+ update_cpufreq_files_in_dir "$1/$file"
+ fi
+ done
+
+ printf "\n"
+}
+
+# Update all writable files with their existing values
+update_all_cpufreq_files()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+ update_cpufreq_files_in_dir $CPUFREQROOT
+
+ printf "%s\n\n" "------------------------------------------------"
+}
+
+
+# CHANGE CPU FREQUENCIES
+
+# $1: policy
+find_current_freq()
+{
+ cat $CPUFREQROOT/$1/scaling_cur_freq
+}
+
+# $1: policy
+# $2: frequency
+set_cpu_frequency()
+{
+ printf "Change frequency for $1 to $2\n"
+ echo $2 > $CPUFREQROOT/$1/scaling_setspeed
+}
+
+# $1: policy
+test_all_frequencies()
+{
+ local filepath="$CPUFREQROOT/$1"
+
+ backup_governor $1
+
+ local found=$(switch_governor $1 "userspace")
+ if [ $found = 1 ]; then
+ printf "${FUNCNAME[0]}: userspace governor not available for: $1\n"
+ return;
+ fi
+
+ printf "Switched governor for $1 to userspace\n\n"
+
+ local freqs=$(cat $filepath/scaling_available_frequencies)
+ printf "Available frequencies for $1: $freqs\n\n"
+
+ # Set all frequencies one-by-one
+ for freq in $freqs; do
+ set_cpu_frequency $1 $freq
+ done
+
+ printf "\n"
+
+ restore_governor $1
+}
+
+# $1: loop count
+shuffle_frequency_for_all_cpus()
+{
+ printf "** Test: Running ${FUNCNAME[0]} for $1 loops **\n\n"
+
+ for i in `seq 1 $1`; do
+ for_each_policy test_all_frequencies
+ done
+ printf "\n%s\n\n" "------------------------------------------------"
+}
+
+# Basic cpufreq tests
+cpufreq_basic_tests()
+{
+ printf "*** RUNNING CPUFREQ SANITY TESTS ***\n"
+ printf "====================================\n\n"
+
+ count=$(count_cpufreq_managed_cpus)
+ if [ $count = 0 ]; then
+ printf "No cpu is managed by cpufreq core, exiting\n"
+ exit;
+ else
+ printf "CPUFreq manages: $count CPUs\n\n"
+ fi
+
+ # Detect & print which CPUs are not managed by cpufreq
+ print_unmanaged_cpus
+
+ # read/update all cpufreq files
+ read_all_cpufreq_files
+ update_all_cpufreq_files
+
+ # hotplug cpus
+ reboot_cpus 5
+
+ # Test all frequencies
+ shuffle_frequency_for_all_cpus 2
+
+ # Test all governors
+ shuffle_governors_for_all_cpus 1
+}
+
+# Suspend/resume
+# $1: "suspend" or "hibernate", $2: loop count
+do_suspend()
+{
+ printf "** Test: Running ${FUNCNAME[0]}: Trying $1 for $2 loops **\n\n"
+
+ # Is the directory available
+ if [ ! -d $SYSFS/power/ -o ! -f $SYSFS/power/state ]; then
+ printf "$SYSFS/power/state not available\n"
+ return 1
+ fi
+
+ if [ $1 = "suspend" ]; then
+ filename="mem"
+ elif [ $1 = "hibernate" ]; then
+ filename="disk"
+ else
+ printf "$1 is not a valid option\n"
+ return 1
+ fi
+
+ if [ -n $filename ]; then
+ present=$(cat $SYSFS/power/state | grep $filename)
+
+ if [ -z "$present" ]; then
+ printf "Tried to $1 but $filename isn't present in $SYSFS/power/state\n"
+ return 1;
+ fi
+
+ for i in `seq 1 $2`; do
+ printf "Starting $1\n"
+ echo $filename > $SYSFS/power/state
+ printf "Came out of $1\n"
+
+ printf "Do basic tests after finishing $1 to verify cpufreq state\n\n"
+ cpufreq_basic_tests
+ done
+ fi
+}
diff --git a/tools/testing/selftests/cpufreq/governor.sh b/tools/testing/selftests/cpufreq/governor.sh
new file mode 100755
index 000000000..fe37df79c
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/governor.sh
@@ -0,0 +1,154 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test governors
+
+# protect against multiple inclusion
+if [ $FILE_GOVERNOR ]; then
+ return 0
+else
+ FILE_GOVERNOR=DONE
+fi
+
+source cpu.sh
+source cpufreq.sh
+
+CUR_GOV=
+CUR_FREQ=
+
+# Find governor's directory path
+# $1: policy, $2: governor
+find_gov_directory()
+{
+ if [ -d $CPUFREQROOT/$2 ]; then
+ printf "$CPUFREQROOT/$2\n"
+ elif [ -d $CPUFREQROOT/$1/$2 ]; then
+ printf "$CPUFREQROOT/$1/$2\n"
+ else
+ printf "INVALID\n"
+ fi
+}
+
+# $1: policy
+find_current_governor()
+{
+ cat $CPUFREQROOT/$1/scaling_governor
+}
+
+# $1: policy
+backup_governor()
+{
+ CUR_GOV=$(find_current_governor $1)
+
+ printf "Governor backup done for $1: $CUR_GOV\n"
+
+ if [ $CUR_GOV == "userspace" ]; then
+ CUR_FREQ=$(find_current_freq $1)
+ printf "Governor frequency backup done for $1: $CUR_FREQ\n"
+ fi
+
+ printf "\n"
+}
+
+# $1: policy
+restore_governor()
+{
+ __switch_governor $1 $CUR_GOV
+
+ printf "Governor restored for $1 to $CUR_GOV\n"
+
+ if [ $CUR_GOV == "userspace" ]; then
+ set_cpu_frequency $1 $CUR_FREQ
+ printf "Governor frequency restored for $1: $CUR_FREQ\n"
+ fi
+
+ printf "\n"
+}
+
+# param:
+# $1: policy, $2: governor
+__switch_governor()
+{
+ echo $2 > $CPUFREQROOT/$1/scaling_governor
+}
+
+# param:
+# $1: cpu, $2: governor
+__switch_governor_for_cpu()
+{
+ echo $2 > $CPUROOT/$1/cpufreq/scaling_governor
+}
+
+# SWITCH GOVERNORS
+
+# $1: cpu, $2: governor
+switch_governor()
+{
+ local filepath=$CPUFREQROOT/$1/scaling_available_governors
+
+ # check if governor is available
+ local found=$(cat $filepath | grep $2 | wc -l)
+ if [ $found = 0 ]; then
+ echo 1;
+ return
+ fi
+
+ __switch_governor $1 $2
+ echo 0;
+}
+
+# $1: policy, $2: governor
+switch_show_governor()
+{
+ cur_gov=find_current_governor
+ if [ $cur_gov == "userspace" ]; then
+ cur_freq=find_current_freq
+ fi
+
+ # switch governor
+ __switch_governor $1 $2
+
+ printf "\nSwitched governor for $1 to $2\n\n"
+
+ if [ $2 == "userspace" -o $2 == "powersave" -o $2 == "performance" ]; then
+ printf "No files to read for $2 governor\n\n"
+ return
+ fi
+
+ # show governor files
+ local govpath=$(find_gov_directory $1 $2)
+ read_cpufreq_files_in_dir $govpath
+}
+
+# $1: function to be called, $2: policy
+call_for_each_governor()
+{
+ local filepath=$CPUFREQROOT/$2/scaling_available_governors
+
+ # Exit if cpu isn't managed by cpufreq core
+ if [ ! -f $filepath ]; then
+ return;
+ fi
+
+ backup_governor $2
+
+ local governors=$(cat $filepath)
+ printf "Available governors for $2: $governors\n"
+
+ for governor in $governors; do
+ $1 $2 $governor
+ done
+
+ restore_governor $2
+}
+
+# $1: loop count
+shuffle_governors_for_all_cpus()
+{
+ printf "** Test: Running ${FUNCNAME[0]} for $1 loops **\n\n"
+
+ for i in `seq 1 $1`; do
+ for_each_policy call_for_each_governor switch_show_governor
+ done
+ printf "%s\n\n" "------------------------------------------------"
+}
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
new file mode 100755
index 000000000..31f8c9a76
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/main.sh
@@ -0,0 +1,198 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source cpu.sh
+source cpufreq.sh
+source governor.sh
+source module.sh
+source special-tests.sh
+
+FUNC=basic # do basic tests by default
+OUTFILE=cpufreq_selftest
+SYSFS=
+CPUROOT=
+CPUFREQROOT=
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+helpme()
+{
+ printf "Usage: $0 [-h] [-todg args]
+ [-h <help>]
+ [-o <output-file-for-dump>]
+ [-t <basic: Basic cpufreq testing
+ suspend: suspend/resume,
+ hibernate: hibernate/resume,
+ modtest: test driver or governor modules. Only to be used with -d or -g options,
+ sptest1: Simple governor switch to produce lockdep.
+ sptest2: Concurrent governor switch to produce lockdep.
+ sptest3: Governor races, shuffle between governors quickly.
+ sptest4: CPU hotplugs with updates to cpufreq files.>]
+ [-d <driver's module name: only with \"-t modtest>\"]
+ [-g <governor's module name: only with \"-t modtest>\"]
+ \n"
+ exit 2
+}
+
+prerequisite()
+{
+ msg="skip all tests:"
+
+ if [ $UID != 0 ]; then
+ echo $msg must be run as root >&2
+ exit $ksft_skip
+ fi
+
+ taskset -p 01 $$
+
+ SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+
+ if [ ! -d "$SYSFS" ]; then
+ echo $msg sysfs is not mounted >&2
+ exit 2
+ fi
+
+ CPUROOT=$SYSFS/devices/system/cpu
+ CPUFREQROOT="$CPUROOT/cpufreq"
+
+ if ! ls $CPUROOT/cpu* > /dev/null 2>&1; then
+ echo $msg cpus not available in sysfs >&2
+ exit 2
+ fi
+
+ if ! ls $CPUROOT/cpufreq > /dev/null 2>&1; then
+ echo $msg cpufreq directory not available in sysfs >&2
+ exit 2
+ fi
+}
+
+parse_arguments()
+{
+ while getopts ht:o:d:g: arg
+ do
+ case $arg in
+ h) # --help
+ helpme
+ ;;
+
+ t) # --func_type (Function to perform: basic, suspend, hibernate, modtest, sptest1/2/3/4 (default: basic))
+ FUNC=$OPTARG
+ ;;
+
+ o) # --output-file (Output file to store dumps)
+ OUTFILE=$OPTARG
+ ;;
+
+ d) # --driver-mod-name (Name of the driver module)
+ DRIVER_MOD=$OPTARG
+ ;;
+
+ g) # --governor-mod-name (Name of the governor module)
+ GOVERNOR_MOD=$OPTARG
+ ;;
+
+ \?)
+ helpme
+ ;;
+ esac
+ done
+}
+
+do_test()
+{
+ # Check if CPUs are managed by cpufreq or not
+ count=$(count_cpufreq_managed_cpus)
+
+ if [ $count = 0 -a $FUNC != "modtest" ]; then
+ echo "No cpu is managed by cpufreq core, exiting"
+ exit 2;
+ fi
+
+ case "$FUNC" in
+ "basic")
+ cpufreq_basic_tests
+ ;;
+
+ "suspend")
+ do_suspend "suspend" 1
+ ;;
+
+ "hibernate")
+ do_suspend "hibernate" 1
+ ;;
+
+ "modtest")
+ # Do we have modules in place?
+ if [ -z $DRIVER_MOD ] && [ -z $GOVERNOR_MOD ]; then
+ echo "No driver or governor module passed with -d or -g"
+ exit 2;
+ fi
+
+ if [ $DRIVER_MOD ]; then
+ if [ $GOVERNOR_MOD ]; then
+ module_test $DRIVER_MOD $GOVERNOR_MOD
+ else
+ module_driver_test $DRIVER_MOD
+ fi
+ else
+ if [ $count = 0 ]; then
+ echo "No cpu is managed by cpufreq core, exiting"
+ exit 2;
+ fi
+
+ module_governor_test $GOVERNOR_MOD
+ fi
+ ;;
+
+ "sptest1")
+ simple_lockdep
+ ;;
+
+ "sptest2")
+ concurrent_lockdep
+ ;;
+
+ "sptest3")
+ governor_race
+ ;;
+
+ "sptest4")
+ hotplug_with_updates
+ ;;
+
+ *)
+ echo "Invalid [-f] function type"
+ helpme
+ ;;
+ esac
+}
+
+# clear dumps
+# $1: file name
+clear_dumps()
+{
+ echo "" > $1.txt
+ echo "" > $1.dmesg_cpufreq.txt
+ echo "" > $1.dmesg_full.txt
+}
+
+# $1: output file name
+dmesg_dumps()
+{
+ dmesg | grep cpufreq >> $1.dmesg_cpufreq.txt
+
+ # We may need the full logs as well
+ dmesg >> $1.dmesg_full.txt
+}
+
+# Parse arguments
+parse_arguments $@
+
+# Make sure all requirements are met
+prerequisite
+
+# Run requested functions
+clear_dumps $OUTFILE
+do_test >> $OUTFILE.txt
+dmesg_dumps $OUTFILE
diff --git a/tools/testing/selftests/cpufreq/module.sh b/tools/testing/selftests/cpufreq/module.sh
new file mode 100755
index 000000000..22563cd12
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/module.sh
@@ -0,0 +1,244 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Modules specific tests cases
+
+# protect against multiple inclusion
+if [ $FILE_MODULE ]; then
+ return 0
+else
+ FILE_MODULE=DONE
+fi
+
+source cpu.sh
+source cpufreq.sh
+source governor.sh
+
+# Check basic insmod/rmmod
+# $1: module
+test_basic_insmod_rmmod()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+ printf "Inserting $1 module\n"
+ # insert module
+ insmod $1
+ if [ $? != 0 ]; then
+ printf "Insmod $1 failed\n"
+ exit;
+ fi
+
+ printf "Removing $1 module\n"
+ # remove module
+ rmmod $1
+ if [ $? != 0 ]; then
+ printf "rmmod $1 failed\n"
+ exit;
+ fi
+
+ printf "\n"
+}
+
+# Insert cpufreq driver module and perform basic tests
+# $1: cpufreq-driver module to insert
+# $2: If we want to play with CPUs (1) or not (0)
+module_driver_test_single()
+{
+ printf "** Test: Running ${FUNCNAME[0]} for driver $1 and cpus_hotplug=$2 **\n\n"
+
+ if [ $2 -eq 1 ]; then
+ # offline all non-boot CPUs
+ for_each_non_boot_cpu offline_cpu
+ printf "\n"
+ fi
+
+ # insert module
+ printf "Inserting $1 module\n\n"
+ insmod $1
+ if [ $? != 0 ]; then
+ printf "Insmod $1 failed\n"
+ return;
+ fi
+
+ if [ $2 -eq 1 ]; then
+ # online all non-boot CPUs
+ for_each_non_boot_cpu online_cpu
+ printf "\n"
+ fi
+
+ # run basic tests
+ cpufreq_basic_tests
+
+ # remove module
+ printf "Removing $1 module\n\n"
+ rmmod $1
+ if [ $? != 0 ]; then
+ printf "rmmod $1 failed\n"
+ return;
+ fi
+
+ # There shouldn't be any cpufreq directories now.
+ for_each_cpu cpu_should_not_have_cpufreq_directory
+ printf "\n"
+}
+
+# $1: cpufreq-driver module to insert
+module_driver_test()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+ # check if module is present or not
+ ls $1 > /dev/null
+ if [ $? != 0 ]; then
+ printf "$1: not present in `pwd` folder\n"
+ return;
+ fi
+
+ # test basic module tests
+ test_basic_insmod_rmmod $1
+
+ # Do simple module test
+ module_driver_test_single $1 0
+
+ # Remove CPUs before inserting module and then bring them back
+ module_driver_test_single $1 1
+ printf "\n"
+}
+
+# find governor name based on governor module name
+# $1: governor module name
+find_gov_name()
+{
+ if [ $1 = "cpufreq_ondemand.ko" ]; then
+ printf "ondemand"
+ elif [ $1 = "cpufreq_conservative.ko" ]; then
+ printf "conservative"
+ elif [ $1 = "cpufreq_userspace.ko" ]; then
+ printf "userspace"
+ elif [ $1 = "cpufreq_performance.ko" ]; then
+ printf "performance"
+ elif [ $1 = "cpufreq_powersave.ko" ]; then
+ printf "powersave"
+ elif [ $1 = "cpufreq_schedutil.ko" ]; then
+ printf "schedutil"
+ fi
+}
+
+# $1: governor string, $2: governor module, $3: policy
+# example: module_governor_test_single "ondemand" "cpufreq_ondemand.ko" 2
+module_governor_test_single()
+{
+ printf "** Test: Running ${FUNCNAME[0]} for $3 **\n\n"
+
+ backup_governor $3
+
+ # switch to new governor
+ printf "Switch from $CUR_GOV to $1\n"
+ switch_show_governor $3 $1
+
+ # try removing module, it should fail as governor is used
+ printf "Removing $2 module\n\n"
+ rmmod $2
+ if [ $? = 0 ]; then
+ printf "WARN: rmmod $2 succeeded even if governor is used\n"
+ insmod $2
+ else
+ printf "Pass: unable to remove $2 while it is being used\n\n"
+ fi
+
+ # switch back to old governor
+ printf "Switchback to $CUR_GOV from $1\n"
+ restore_governor $3
+ printf "\n"
+}
+
+# Insert cpufreq governor module and perform basic tests
+# $1: cpufreq-governor module to insert
+module_governor_test()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+ # check if module is present or not
+ ls $1 > /dev/null
+ if [ $? != 0 ]; then
+ printf "$1: not present in `pwd` folder\n"
+ return;
+ fi
+
+ # test basic module tests
+ test_basic_insmod_rmmod $1
+
+ # insert module
+ printf "Inserting $1 module\n\n"
+ insmod $1
+ if [ $? != 0 ]; then
+ printf "Insmod $1 failed\n"
+ return;
+ fi
+
+ # switch to new governor for each cpu
+ for_each_policy module_governor_test_single $(find_gov_name $1) $1
+
+ # remove module
+ printf "Removing $1 module\n\n"
+ rmmod $1
+ if [ $? != 0 ]; then
+ printf "rmmod $1 failed\n"
+ return;
+ fi
+ printf "\n"
+}
+
+# test modules: driver and governor
+# $1: driver module, $2: governor module
+module_test()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+ # check if modules are present or not
+ ls $1 $2 > /dev/null
+ if [ $? != 0 ]; then
+ printf "$1 or $2: is not present in `pwd` folder\n"
+ return;
+ fi
+
+ # TEST1: Insert gov after driver
+ # insert driver module
+ printf "Inserting $1 module\n\n"
+ insmod $1
+ if [ $? != 0 ]; then
+ printf "Insmod $1 failed\n"
+ return;
+ fi
+
+ # run governor tests
+ module_governor_test $2
+
+ # remove driver module
+ printf "Removing $1 module\n\n"
+ rmmod $1
+ if [ $? != 0 ]; then
+ printf "rmmod $1 failed\n"
+ return;
+ fi
+
+ # TEST2: Insert driver after governor
+ # insert governor module
+ printf "Inserting $2 module\n\n"
+ insmod $2
+ if [ $? != 0 ]; then
+ printf "Insmod $2 failed\n"
+ return;
+ fi
+
+ # run governor tests
+ module_driver_test $1
+
+ # remove driver module
+ printf "Removing $2 module\n\n"
+ rmmod $2
+ if [ $? != 0 ]; then
+ printf "rmmod $2 failed\n"
+ return;
+ fi
+}
diff --git a/tools/testing/selftests/cpufreq/special-tests.sh b/tools/testing/selftests/cpufreq/special-tests.sh
new file mode 100755
index 000000000..8d40505dc
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/special-tests.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Special test cases reported by people
+
+# Testcase 1: Reported here: http://marc.info/?l=linux-pm&m=140618592709858&w=2
+
+# protect against multiple inclusion
+if [ $FILE_SPECIAL ]; then
+ return 0
+else
+ FILE_SPECIAL=DONE
+fi
+
+source cpu.sh
+source cpufreq.sh
+source governor.sh
+
+# Test 1
+# $1: policy
+__simple_lockdep()
+{
+ # switch to ondemand
+ __switch_governor $1 "ondemand"
+
+ # cat ondemand files
+ local ondir=$(find_gov_directory $1 "ondemand")
+ if [ -z $ondir ]; then
+ printf "${FUNCNAME[0]}Ondemand directory not created, quit"
+ return
+ fi
+
+ cat $ondir/*
+
+ # switch to conservative
+ __switch_governor $1 "conservative"
+}
+
+simple_lockdep()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n"
+
+ for_each_policy __simple_lockdep
+}
+
+# Test 2
+# $1: policy
+__concurrent_lockdep()
+{
+ for i in `seq 0 100`; do
+ __simple_lockdep $1
+ done
+}
+
+concurrent_lockdep()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n"
+
+ for_each_policy_concurrent __concurrent_lockdep
+}
+
+# Test 3
+quick_shuffle()
+{
+ # this is called concurrently from governor_race
+ for I in `seq 1000`
+ do
+ echo ondemand | sudo tee $CPUFREQROOT/policy*/scaling_governor &
+ echo userspace | sudo tee $CPUFREQROOT/policy*/scaling_governor &
+ done
+}
+
+governor_race()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n"
+
+ # run 8 concurrent instances
+ for I in `seq 8`
+ do
+ quick_shuffle &
+ done
+}
+
+# Test 4
+# $1: cpu
+hotplug_with_updates_cpu()
+{
+ local filepath="$CPUROOT/$1/cpufreq"
+
+ # switch to ondemand
+ __switch_governor_for_cpu $1 "ondemand"
+
+ for i in `seq 1 5000`
+ do
+ reboot_cpu $1
+ done &
+
+ local freqs=$(cat $filepath/scaling_available_frequencies)
+ local oldfreq=$(cat $filepath/scaling_min_freq)
+
+ for j in `seq 1 5000`
+ do
+ # Set all frequencies one-by-one
+ for freq in $freqs; do
+ echo $freq > $filepath/scaling_min_freq
+ done
+ done
+
+ # restore old freq
+ echo $oldfreq > $filepath/scaling_min_freq
+}
+
+hotplug_with_updates()
+{
+ for_each_non_boot_cpu hotplug_with_updates_cpu
+}
diff --git a/tools/testing/selftests/drivers/gpu/drm_mm.sh b/tools/testing/selftests/drivers/gpu/drm_mm.sh
new file mode 100755
index 000000000..b789dc825
--- /dev/null
+++ b/tools/testing/selftests/drivers/gpu/drm_mm.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs API tests for struct drm_mm (DRM range manager)
+
+if ! /sbin/modprobe -n -q test-drm_mm; then
+ echo "drivers/gpu/drm_mm: [skip]"
+ exit 77
+fi
+
+if /sbin/modprobe -q test-drm_mm; then
+ /sbin/modprobe -q -r test-drm_mm
+ echo "drivers/gpu/drm_mm: ok"
+else
+ echo "drivers/gpu/drm_mm: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/gpu/i915.sh b/tools/testing/selftests/drivers/gpu/i915.sh
new file mode 100755
index 000000000..d3895bc71
--- /dev/null
+++ b/tools/testing/selftests/drivers/gpu/i915.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs hardware independent tests for i915 (drivers/gpu/drm/i915)
+
+if ! /sbin/modprobe -q -r i915; then
+ echo "drivers/gpu/i915: [SKIP]"
+ exit 77
+fi
+
+if /sbin/modprobe -q i915 mock_selftests=-1; then
+ /sbin/modprobe -q -r i915
+ echo "drivers/gpu/i915: ok"
+else
+ echo "drivers/gpu/i915: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
new file mode 100755
index 000000000..76f1ab489
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
@@ -0,0 +1,217 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# ../../../net/forwarding/mirror_gre_topo_lib.sh for more details.
+#
+# Test offloading various features of offloading gretap mirrors specific to
+# mlxsw.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/mirror_lib.sh
+source $lib_dir/mirror_gre_lib.sh
+source $lib_dir/mirror_gre_topo_lib.sh
+
+setup_keyful()
+{
+ tunnel_create gt6-key ip6gretap 2001:db8:3::1 2001:db8:3::2 \
+ ttl 100 tos inherit allow-localremote \
+ key 1234
+
+ tunnel_create h3-gt6-key ip6gretap 2001:db8:3::2 2001:db8:3::1 \
+ key 1234
+ ip link set h3-gt6-key vrf v$h3
+ matchall_sink_create h3-gt6-key
+
+ ip address add dev $swp3 2001:db8:3::1/64
+ ip address add dev $h3 2001:db8:3::2/64
+}
+
+cleanup_keyful()
+{
+ ip address del dev $h3 2001:db8:3::2/64
+ ip address del dev $swp3 2001:db8:3::1/64
+
+ tunnel_destroy h3-gt6-key
+ tunnel_destroy gt6-key
+}
+
+setup_soft()
+{
+ # Set up a topology for testing underlay routes that point at an
+ # unsupported soft device.
+
+ tunnel_create gt6-soft ip6gretap 2001:db8:4::1 2001:db8:4::2 \
+ ttl 100 tos inherit allow-localremote
+
+ tunnel_create h3-gt6-soft ip6gretap 2001:db8:4::2 2001:db8:4::1
+ ip link set h3-gt6-soft vrf v$h3
+ matchall_sink_create h3-gt6-soft
+
+ ip link add name v1 type veth peer name v2
+ ip link set dev v1 up
+ ip address add dev v1 2001:db8:4::1/64
+
+ ip link set dev v2 vrf v$h3
+ ip link set dev v2 up
+ ip address add dev v2 2001:db8:4::2/64
+}
+
+cleanup_soft()
+{
+ ip link del dev v1
+
+ tunnel_destroy h3-gt6-soft
+ tunnel_destroy gt6-soft
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ setup_keyful
+ setup_soft
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ cleanup_soft
+ cleanup_keyful
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_ttl_inherit()
+{
+ local tundev=$1; shift
+ local type=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip link set dev $tundev type $type ttl inherit
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+
+ ip link set dev $tundev type $type ttl 100
+
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: no offload on TTL of inherit ($tcflags)"
+}
+
+test_span_gre_tos_fixed()
+{
+ local tundev=$1; shift
+ local type=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip link set dev $tundev type $type tos 0x10
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+
+ ip link set dev $tundev type $type tos inherit
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: no offload on a fixed TOS ($tcflags)"
+}
+
+test_span_failable()
+{
+ local should_fail=$1; shift
+ local tundev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ if ((should_fail)); then
+ fail_test_span_gre_dir $tundev ingress
+ else
+ quick_test_span_gre_dir $tundev ingress
+ fi
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: should_fail=$should_fail ($tcflags)"
+}
+
+test_failable()
+{
+ local should_fail=$1; shift
+
+ test_span_failable $should_fail gt6-key "mirror to keyful gretap"
+ test_span_failable $should_fail gt6-soft "mirror to gretap w/ soft underlay"
+}
+
+test_sw()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ test_failable 0
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+test_hw()
+{
+ test_failable 1
+
+ test_span_gre_tos_fixed gt4 gretap "mirror to gretap"
+ test_span_gre_tos_fixed gt6 ip6gretap "mirror to ip6gretap"
+
+ test_span_gre_ttl_inherit gt4 gretap "mirror to gretap"
+ test_span_gre_ttl_inherit gt6 ip6gretap "mirror to ip6gretap"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+if ! tc_offload_check; then
+ check_err 1 "Could not test offloaded functionality"
+ log_test "mlxsw-specific tests for mirror to gretap"
+ exit
+fi
+
+tcflags="skip_hw"
+test_sw
+
+tcflags="skip_sw"
+test_hw
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
new file mode 100644
index 000000000..6f3a70df6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
@@ -0,0 +1,197 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Test offloading a number of mirrors-to-gretap. The test creates a number of
+# tunnels. Then it adds one flower mirror for each of the tunnels, matching a
+# given host IP. Then it generates traffic at each of the host IPs and checks
+# that the traffic has been mirrored at the appropriate tunnel.
+#
+# +--------------------------+ +--------------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 2001:db8:1:X::1/64 | | 2001:db8:1:X::2/64 | |
+# +-----|--------------------+ +--------------------|-----+
+# | |
+# +-----|-------------------------------------------------------------|-----+
+# | SW o--> mirrors | |
+# | +---|-------------------------------------------------------------|---+ |
+# | | + $swp1 BR $swp2 + | |
+# | +---------------------------------------------------------------------+ |
+# | |
+# | + $swp3 + gt6-<X> (ip6gretap) |
+# | | 2001:db8:2:X::1/64 : loc=2001:db8:2:X::1 |
+# | | : rem=2001:db8:2:X::2 |
+# | | : ttl=100 |
+# | | : tos=inherit |
+# | | : |
+# +-----|--------------------------------:----------------------------------+
+# | :
+# +-----|--------------------------------:----------------------------------+
+# | H3 + $h3 + h3-gt6-<X> (ip6gretap) |
+# | 2001:db8:2:X::2/64 loc=2001:db8:2:X::2 |
+# | rem=2001:db8:2:X::1 |
+# | ttl=100 |
+# | tos=inherit |
+# | |
+# +-------------------------------------------------------------------------+
+
+source ../../../../net/forwarding/mirror_lib.sh
+
+MIRROR_NUM_NETIFS=6
+
+mirror_gre_ipv6_addr()
+{
+ local net=$1; shift
+ local num=$1; shift
+
+ printf "2001:db8:%x:%x" $net $num
+}
+
+mirror_gre_tunnels_create()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ MIRROR_GRE_BATCH_FILE="$(mktemp)"
+ for ((i=0; i < count; ++i)); do
+ local match_dip=$(mirror_gre_ipv6_addr 1 $i)::2
+ local htun=h3-gt6-$i
+ local tun=gt6-$i
+
+ ((mirror_gre_tunnels++))
+
+ ip address add dev $h1 $(mirror_gre_ipv6_addr 1 $i)::1/64
+ ip address add dev $h2 $(mirror_gre_ipv6_addr 1 $i)::2/64
+
+ ip address add dev $swp3 $(mirror_gre_ipv6_addr 2 $i)::1/64
+ ip address add dev $h3 $(mirror_gre_ipv6_addr 2 $i)::2/64
+
+ tunnel_create $tun ip6gretap \
+ $(mirror_gre_ipv6_addr 2 $i)::1 \
+ $(mirror_gre_ipv6_addr 2 $i)::2 \
+ ttl 100 tos inherit allow-localremote
+
+ tunnel_create $htun ip6gretap \
+ $(mirror_gre_ipv6_addr 2 $i)::2 \
+ $(mirror_gre_ipv6_addr 2 $i)::1
+ ip link set $htun vrf v$h3
+ matchall_sink_create $htun
+
+ cat >> $MIRROR_GRE_BATCH_FILE <<-EOF
+ filter add dev $swp1 ingress pref 1000 \
+ protocol ipv6 \
+ flower $tcflags dst_ip $match_dip \
+ action mirred egress mirror dev $tun
+ EOF
+ done
+
+ tc -b $MIRROR_GRE_BATCH_FILE
+ check_err_fail $should_fail $? "Mirror rule insertion"
+}
+
+mirror_gre_tunnels_destroy()
+{
+ local count=$1; shift
+
+ for ((i=0; i < count; ++i)); do
+ local htun=h3-gt6-$i
+ local tun=gt6-$i
+
+ ip address del dev $h3 $(mirror_gre_ipv6_addr 2 $i)::2/64
+ ip address del dev $swp3 $(mirror_gre_ipv6_addr 2 $i)::1/64
+
+ ip address del dev $h2 $(mirror_gre_ipv6_addr 1 $i)::2/64
+ ip address del dev $h1 $(mirror_gre_ipv6_addr 1 $i)::1/64
+
+ tunnel_destroy $htun
+ tunnel_destroy $tun
+ done
+}
+
+__mirror_gre_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ mirror_gre_tunnels_create $count $should_fail
+ if ((should_fail)); then
+ return
+ fi
+
+ sleep 5
+
+ for ((i = 0; i < count; ++i)); do
+ local dip=$(mirror_gre_ipv6_addr 1 $i)::2
+ local htun=h3-gt6-$i
+ local message
+
+ icmp6_capture_install $htun
+ mirror_test v$h1 "" $dip $htun 100 10
+ icmp6_capture_uninstall $htun
+ done
+}
+
+mirror_gre_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then
+ check_err 1 "Could not test offloaded functionality"
+ return
+ fi
+
+ tcflags="skip_sw"
+ __mirror_gre_test $count $should_fail
+}
+
+mirror_gre_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ mirror_gre_tunnels=0
+
+ vrf_prepare
+
+ simple_if_init $h1
+ simple_if_init $h2
+ simple_if_init $h3
+
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ tc qdisc add dev $swp1 clsact
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ ip link set dev $swp3 up
+}
+
+mirror_gre_cleanup()
+{
+ mirror_gre_tunnels_destroy $mirror_gre_tunnels
+
+ ip link set dev $swp3 down
+
+ ip link set dev $swp2 down
+
+ tc qdisc del dev $swp1 clsact
+ ip link set dev $swp1 down
+
+ ip link del dev br1
+
+ simple_if_fini $h3
+ simple_if_fini $h2
+ simple_if_fini $h1
+
+ vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
new file mode 100755
index 000000000..1ca631d5a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
@@ -0,0 +1,189 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for DSCP prioritization and rewrite. Packets ingress $swp1 with a DSCP
+# tag and are prioritized according to the map at $swp1. They egress $swp2 and
+# the DSCP value is updated to match the map at that interface. The updated DSCP
+# tag is verified at $h2.
+#
+# ICMP responses are produced with the same DSCP tag that arrived at $h2. They
+# go through prioritization at $swp2 and DSCP retagging at $swp1. The tag is
+# verified at $h1--it should match the original tag.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +----|-----------------+ +----------------|-----+
+# | |
+# +----|----------------------------------------------------------------|-----+
+# | SW | | |
+# | +-|----------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | | APP=0,5,10 .. 7,5,17 APP=0,5,20 .. 7,5,27 | |
+# | +--------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ test_dscp
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+
+h1_create()
+{
+ local dscp;
+
+ simple_if_init $h1 192.0.2.1/28
+ tc qdisc add dev $h1 clsact
+ dscp_capture_install $h1 10
+}
+
+h1_destroy()
+{
+ dscp_capture_uninstall $h1 10
+ tc qdisc del dev $h1 clsact
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+ tc qdisc add dev $h2 clsact
+ dscp_capture_install $h2 20
+}
+
+h2_destroy()
+{
+ dscp_capture_uninstall $h2 20
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+dscp_map()
+{
+ local base=$1; shift
+
+ for prio in {0..7}; do
+ echo app=$prio,5,$((base + prio))
+ done
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ lldptool -T -i $swp1 -V APP $(dscp_map 10) >/dev/null
+ lldptool -T -i $swp2 -V APP $(dscp_map 20) >/dev/null
+ lldpad_app_wait_set $swp1
+ lldpad_app_wait_set $swp2
+}
+
+switch_destroy()
+{
+ lldptool -T -i $swp2 -V APP -d $(dscp_map 20) >/dev/null
+ lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null
+ lldpad_app_wait_del
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+dscp_ping_test()
+{
+ local vrf_name=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local prio=$1; shift
+ local dev_10=$1; shift
+ local dev_20=$1; shift
+
+ local dscp_10=$(((prio + 10) << 2))
+ local dscp_20=$(((prio + 20) << 2))
+
+ RET=0
+
+ local -A t0s
+ eval "t0s=($(dscp_fetch_stats $dev_10 10)
+ $(dscp_fetch_stats $dev_20 20))"
+
+ ip vrf exec $vrf_name \
+ ${PING} -Q $dscp_10 ${sip:+-I $sip} $dip \
+ -c 10 -i 0.1 -w 2 &> /dev/null
+
+ local -A t1s
+ eval "t1s=($(dscp_fetch_stats $dev_10 10)
+ $(dscp_fetch_stats $dev_20 20))"
+
+ for key in ${!t0s[@]}; do
+ local expect
+ if ((key == prio+10 || key == prio+20)); then
+ expect=10
+ else
+ expect=0
+ fi
+
+ local delta=$((t1s[$key] - t0s[$key]))
+ ((expect == delta))
+ check_err $? "DSCP $key: Expected to capture $expect packets, got $delta."
+ done
+
+ log_test "DSCP rewrite: $dscp_10-(prio $prio)-$dscp_20"
+}
+
+test_dscp()
+{
+ for prio in {0..7}; do
+ dscp_ping_test v$h1 192.0.2.1 192.0.2.2 $prio $h1 $h2
+ done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
new file mode 100755
index 000000000..281d90766
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
@@ -0,0 +1,233 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for DSCP prioritization in the router.
+#
+# With ip_forward_update_priority disabled, the packets are expected to keep
+# their DSCP (which in this test uses only values 0..7) intact as they are
+# forwarded by the switch. That is verified at $h2. ICMP responses are formed
+# with the same DSCP as the requests, and likewise pass through the switch
+# intact, which is verified at $h1.
+#
+# With ip_forward_update_priority enabled, router reprioritizes the packets
+# according to the table in reprioritize(). Thus, say, DSCP 7 maps to priority
+# 4, which on egress maps back to DSCP 4. The response packet then gets
+# reprioritized to 6, getting DSCP 6 on egress.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.18/28 | |
+# +----|-----------------+ +----------------|-----+
+# | |
+# +----|----------------------------------------------------------------|-----+
+# | SW | | |
+# | + $swp1 $swp2 + |
+# | 192.0.2.2/28 192.0.2.17/28 |
+# | APP=0,5,0 .. 7,5,7 APP=0,5,0 .. 7,5,7 |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ test_update
+ test_no_update
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+
+reprioritize()
+{
+ local in=$1; shift
+
+ # This is based on rt_tos2priority in include/net/route.h. Assuming 1:1
+ # mapping between priorities and TOS, it yields a new priority for a
+ # packet with ingress priority of $in.
+ local -a reprio=(0 0 2 2 6 6 4 4)
+
+ echo ${reprio[$in]}
+}
+
+h1_create()
+{
+ local dscp;
+
+ simple_if_init $h1 192.0.2.1/28
+ tc qdisc add dev $h1 clsact
+ dscp_capture_install $h1 0
+ ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+ dscp_capture_uninstall $h1 0
+ tc qdisc del dev $h1 clsact
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.18/28
+ tc qdisc add dev $h2 clsact
+ dscp_capture_install $h2 0
+ ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+}
+
+h2_destroy()
+{
+ ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+ dscp_capture_uninstall $h2 0
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.18/28
+}
+
+dscp_map()
+{
+ local base=$1; shift
+
+ for prio in {0..7}; do
+ echo app=$prio,5,$((base + prio))
+ done
+}
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.2/28
+ __simple_if_init $swp2 v$swp1 192.0.2.17/28
+
+ lldptool -T -i $swp1 -V APP $(dscp_map 0) >/dev/null
+ lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
+ lldpad_app_wait_set $swp1
+ lldpad_app_wait_set $swp2
+}
+
+switch_destroy()
+{
+ lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
+ lldptool -T -i $swp1 -V APP -d $(dscp_map 0) >/dev/null
+ lldpad_app_wait_del
+
+ __simple_if_fini $swp2 192.0.2.17/28
+ simple_if_fini $swp1 192.0.2.2/28
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ sysctl_set net.ipv4.ip_forward_update_priority 1
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ sysctl_restore net.ipv4.ip_forward_update_priority
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.18
+}
+
+dscp_ping_test()
+{
+ local vrf_name=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local prio=$1; shift
+ local reprio=$1; shift
+ local dev1=$1; shift
+ local dev2=$1; shift
+
+ local prio2=$($reprio $prio) # ICMP Request egress prio
+ local prio3=$($reprio $prio2) # ICMP Response egress prio
+
+ local dscp=$((prio << 2)) # ICMP Request ingress DSCP
+ local dscp2=$((prio2 << 2)) # ICMP Request egress DSCP
+ local dscp3=$((prio3 << 2)) # ICMP Response egress DSCP
+
+ RET=0
+
+ eval "local -A dev1_t0s=($(dscp_fetch_stats $dev1 0))"
+ eval "local -A dev2_t0s=($(dscp_fetch_stats $dev2 0))"
+
+ ip vrf exec $vrf_name \
+ ${PING} -Q $dscp ${sip:+-I $sip} $dip \
+ -c 10 -i 0.1 -w 2 &> /dev/null
+
+ eval "local -A dev1_t1s=($(dscp_fetch_stats $dev1 0))"
+ eval "local -A dev2_t1s=($(dscp_fetch_stats $dev2 0))"
+
+ for i in {0..7}; do
+ local dscpi=$((i << 2))
+ local expect2=0
+ local expect3=0
+
+ if ((i == prio2)); then
+ expect2=10
+ fi
+ if ((i == prio3)); then
+ expect3=10
+ fi
+
+ local delta=$((dev2_t1s[$i] - dev2_t0s[$i]))
+ ((expect2 == delta))
+ check_err $? "DSCP $dscpi@$dev2: Expected to capture $expect2 packets, got $delta."
+
+ delta=$((dev1_t1s[$i] - dev1_t0s[$i]))
+ ((expect3 == delta))
+ check_err $? "DSCP $dscpi@$dev1: Expected to capture $expect3 packets, got $delta."
+ done
+
+ log_test "DSCP rewrite: $dscp-(prio $prio2)-$dscp2-(prio $prio3)-$dscp3"
+}
+
+__test_update()
+{
+ local update=$1; shift
+ local reprio=$1; shift
+
+ sysctl_restore net.ipv4.ip_forward_update_priority
+ sysctl_set net.ipv4.ip_forward_update_priority $update
+
+ for prio in {0..7}; do
+ dscp_ping_test v$h1 192.0.2.1 192.0.2.18 $prio $reprio $h1 $h2
+ done
+}
+
+test_update()
+{
+ __test_update 1 reprioritize
+}
+
+test_no_update()
+{
+ __test_update 0 echo
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
new file mode 100644
index 000000000..d231649b4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ROUTER_NUM_NETIFS=4
+
+router_h1_create()
+{
+ simple_if_init $h1 192.0.1.1/24
+ ip route add 193.0.0.0/8 via 192.0.1.2 dev $h1
+}
+
+router_h1_destroy()
+{
+ ip route del 193.0.0.0/8 via 192.0.1.2 dev $h1
+ simple_if_fini $h1 192.0.1.1/24
+}
+
+router_h2_create()
+{
+ simple_if_init $h2 192.0.2.1/24
+ tc qdisc add dev $h2 handle ffff: ingress
+}
+
+router_h2_destroy()
+{
+ tc qdisc del dev $h2 handle ffff: ingress
+ simple_if_fini $h2 192.0.2.1/24
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+
+ ip address add 192.0.1.2/24 dev $rp1
+ ip address add 192.0.2.2/24 dev $rp2
+}
+
+router_destroy()
+{
+ ip address del 192.0.2.2/24 dev $rp2
+ ip address del 192.0.1.2/24 dev $rp1
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+router_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ rp1mac=$(mac_get $rp1)
+
+ vrf_prepare
+
+ router_h1_create
+ router_h2_create
+
+ router_create
+}
+
+router_offload_validate()
+{
+ local route_count=$1
+ local offloaded_count
+
+ offloaded_count=$(ip route | grep -o 'offload' | wc -l)
+ [[ $offloaded_count -ge $route_count ]]
+}
+
+router_routes_create()
+{
+ local route_count=$1
+ local count=0
+
+ ROUTE_FILE="$(mktemp)"
+
+ for i in {0..255}
+ do
+ for j in {0..255}
+ do
+ for k in {0..255}
+ do
+ if [[ $count -eq $route_count ]]; then
+ break 3
+ fi
+
+ echo route add 193.${i}.${j}.${k}/32 via \
+ 192.0.2.1 dev $rp2 >> $ROUTE_FILE
+ ((count++))
+ done
+ done
+ done
+
+ ip -b $ROUTE_FILE &> /dev/null
+}
+
+router_routes_destroy()
+{
+ if [[ -v ROUTE_FILE ]]; then
+ rm -f $ROUTE_FILE
+ fi
+}
+
+router_test()
+{
+ local route_count=$1
+ local should_fail=$2
+ local count=0
+
+ RET=0
+
+ router_routes_create $route_count
+
+ router_offload_validate $route_count
+ check_err_fail $should_fail $? "Offload of $route_count routes"
+ if [[ $RET -ne 0 ]] || [[ $should_fail -eq 1 ]]; then
+ return
+ fi
+
+ tc filter add dev $h2 ingress protocol ip pref 1 flower \
+ skip_sw dst_ip 193.0.0.0/8 action drop
+
+ for i in {0..255}
+ do
+ for j in {0..255}
+ do
+ for k in {0..255}
+ do
+ if [[ $count -eq $route_count ]]; then
+ break 3
+ fi
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $rp1mac \
+ -A 192.0.1.1 -B 193.${i}.${j}.${k} \
+ -t ip -q
+ ((count++))
+ done
+ done
+ done
+
+ tc_check_packets "dev $h2 ingress" 1 $route_count
+ check_err $? "Offload mismatch"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 flower \
+ skip_sw dst_ip 193.0.0.0/8 action drop
+
+ router_routes_destroy
+}
+
+router_cleanup()
+{
+ pre_cleanup
+
+ router_routes_destroy
+ router_destroy
+
+ router_h2_destroy
+ router_h1_destroy
+
+ vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
new file mode 100755
index 000000000..3b75180f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
@@ -0,0 +1,366 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking the A-TCAM and C-TCAM operation in Spectrum-2.
+# It tries to exercise as many code paths in the eRP state machine as
+# possible.
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+ALL_TESTS="single_mask_test identical_filters_test two_masks_test \
+ multiple_masks_test ctcam_edge_cases_test"
+NUM_NETIFS=2
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+single_mask_test()
+{
+ # When only a single mask is required, the device uses the master
+ # mask and not the eRP table. Verify that under this mode the right
+ # filter is matched
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Single filter - did not match"
+
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 198.51.100.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 2
+ check_err $? "Two filters - did not match highest priority"
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Two filters - did not match lowest priority"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 2
+ check_err $? "Single filter - did not match after delete"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "single mask test ($tcflags)"
+}
+
+identical_filters_test()
+{
+ # When two filters that only differ in their priority are used,
+ # one needs to be inserted into the C-TCAM. This test verifies
+ # that filters are correctly spilled to C-TCAM and that the right
+ # filter is matched
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match A-TCAM filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match C-TCAM filter after A-TCAM delete"
+
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 2
+ check_err $? "Did not match C-TCAM filter after A-TCAM add"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match A-TCAM filter after C-TCAM delete"
+
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+ log_test "identical filters test ($tcflags)"
+}
+
+two_masks_test()
+{
+ # When more than one mask is required, the eRP table is used. This
+ # test verifies that the eRP table is correctly allocated and used
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.0.0/16 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Two filters - did not match highest priority"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Single filter - did not match"
+
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.0/24 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Two filters - did not match highest priority after add"
+
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "two masks test ($tcflags)"
+}
+
+multiple_masks_test()
+{
+ # The number of masks in a region is limited. Once the maximum
+ # number of masks has been reached filters that require new
+ # masks are spilled to the C-TCAM. This test verifies that
+ # spillage is performed correctly and that the right filter is
+ # matched
+
+ local index
+
+ RET=0
+
+ NUM_MASKS=32
+ BASE_INDEX=100
+
+ for i in $(eval echo {1..$NUM_MASKS}); do
+ index=$((BASE_INDEX - i))
+
+ tc filter add dev $h2 ingress protocol ip pref $index \
+ handle $index \
+ flower $tcflags dst_ip 192.0.2.2/${i} src_ip 192.0.2.1 \
+ action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 \
+ -B 192.0.2.2 -t ip -q
+
+ tc_check_packets "dev $h2 ingress" $index 1
+ check_err $? "$i filters - did not match highest priority (add)"
+ done
+
+ for i in $(eval echo {$NUM_MASKS..1}); do
+ index=$((BASE_INDEX - i))
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 \
+ -B 192.0.2.2 -t ip -q
+
+ tc_check_packets "dev $h2 ingress" $index 2
+ check_err $? "$i filters - did not match highest priority (del)"
+
+ tc filter del dev $h2 ingress protocol ip pref $index \
+ handle $index flower
+ done
+
+ log_test "multiple masks test ($tcflags)"
+}
+
+ctcam_two_atcam_masks_test()
+{
+ RET=0
+
+ # First case: C-TCAM is disabled when there are two A-TCAM masks.
+ # We push a filter into the C-TCAM by using two identical filters
+ # as in identical_filters_test()
+
+ # Filter goes into A-TCAM
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ # Filter goes into C-TCAM
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ # Filter goes into A-TCAM
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.2.0/24 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match A-TCAM filter"
+
+ # Delete both A-TCAM and C-TCAM filters and make sure the remaining
+ # A-TCAM filter still works
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match A-TCAM filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+ log_test "ctcam with two atcam masks test ($tcflags)"
+}
+
+ctcam_one_atcam_mask_test()
+{
+ RET=0
+
+ # Second case: C-TCAM is disabled when there is one A-TCAM mask.
+ # The test is similar to identical_filters_test()
+
+ # Filter goes into A-TCAM
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ # Filter goes into C-TCAM
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match C-TCAM filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match A-TCAM filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "ctcam with one atcam mask test ($tcflags)"
+}
+
+ctcam_no_atcam_masks_test()
+{
+ RET=0
+
+ # Third case: C-TCAM is disabled when there are no A-TCAM masks
+ # This test exercises the code path that transitions the eRP table
+ # to its initial state after deleting the last C-TCAM mask
+
+ # Filter goes into A-TCAM
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ # Filter goes into C-TCAM
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "ctcam with no atcam masks test ($tcflags)"
+}
+
+ctcam_edge_cases_test()
+{
+ # When the C-TCAM is disabled after deleting the last C-TCAM
+ # mask, we want to make sure the eRP state machine is put in
+ # the correct state
+
+ ctcam_two_atcam_masks_test
+ ctcam_one_atcam_mask_test
+ ctcam_no_atcam_masks_test
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+if ! tc_offload_check; then
+ check_err 1 "Could not test offloaded functionality"
+ log_test "mlxsw-specific tests for tc flower"
+ exit
+else
+ tcflags="skip_sw"
+ tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
new file mode 100644
index 000000000..73035e250
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source "../../../../net/forwarding/devlink_lib.sh"
+
+if [ "$DEVLINK_VIDDID" != "15b3:cb84" ]; then
+ echo "SKIP: test is tailored for Mellanox Spectrum"
+ exit 1
+fi
+
+# Needed for returning to default
+declare -A KVD_DEFAULTS
+
+KVD_CHILDREN="linear hash_single hash_double"
+KVDL_CHILDREN="singles chunks large_chunks"
+
+devlink_sp_resource_minimize()
+{
+ local size
+ local i
+
+ for i in $KVD_CHILDREN; do
+ size=$(devlink_resource_get kvd "$i" | jq '.["size_min"]')
+ devlink_resource_size_set "$size" kvd "$i"
+ done
+
+ for i in $KVDL_CHILDREN; do
+ size=$(devlink_resource_get kvd linear "$i" | \
+ jq '.["size_min"]')
+ devlink_resource_size_set "$size" kvd linear "$i"
+ done
+}
+
+devlink_sp_size_kvd_to_default()
+{
+ local need_reload=0
+ local i
+
+ for i in $KVD_CHILDREN; do
+ local size=$(echo "${KVD_DEFAULTS[kvd_$i]}" | jq '.["size"]')
+ current_size=$(devlink_resource_size_get kvd "$i")
+
+ if [ "$size" -ne "$current_size" ]; then
+ devlink_resource_size_set "$size" kvd "$i"
+ need_reload=1
+ fi
+ done
+
+ for i in $KVDL_CHILDREN; do
+ local size=$(echo "${KVD_DEFAULTS[kvd_linear_$i]}" | \
+ jq '.["size"]')
+ current_size=$(devlink_resource_size_get kvd linear "$i")
+
+ if [ "$size" -ne "$current_size" ]; then
+ devlink_resource_size_set "$size" kvd linear "$i"
+ need_reload=1
+ fi
+ done
+
+ if [ "$need_reload" -ne "0" ]; then
+ devlink_reload
+ fi
+}
+
+devlink_sp_read_kvd_defaults()
+{
+ local key
+ local i
+
+ KVD_DEFAULTS[kvd]=$(devlink_resource_get "kvd")
+ for i in $KVD_CHILDREN; do
+ key=kvd_$i
+ KVD_DEFAULTS[$key]=$(devlink_resource_get kvd "$i")
+ done
+
+ for i in $KVDL_CHILDREN; do
+ key=kvd_linear_$i
+ KVD_DEFAULTS[$key]=$(devlink_resource_get kvd linear "$i")
+ done
+}
+
+KVD_PROFILES="default scale ipv4_max"
+
+devlink_sp_resource_kvd_profile_set()
+{
+ local profile=$1
+
+ case "$profile" in
+ scale)
+ devlink_resource_size_set 64000 kvd linear
+ devlink_resource_size_set 15616 kvd linear singles
+ devlink_resource_size_set 32000 kvd linear chunks
+ devlink_resource_size_set 16384 kvd linear large_chunks
+ devlink_resource_size_set 128000 kvd hash_single
+ devlink_resource_size_set 48000 kvd hash_double
+ devlink_reload
+ ;;
+ ipv4_max)
+ devlink_resource_size_set 64000 kvd linear
+ devlink_resource_size_set 15616 kvd linear singles
+ devlink_resource_size_set 32000 kvd linear chunks
+ devlink_resource_size_set 16384 kvd linear large_chunks
+ devlink_resource_size_set 144000 kvd hash_single
+ devlink_resource_size_set 32768 kvd hash_double
+ devlink_reload
+ ;;
+ default)
+ devlink_resource_size_set 98304 kvd linear
+ devlink_resource_size_set 16384 kvd linear singles
+ devlink_resource_size_set 49152 kvd linear chunks
+ devlink_resource_size_set 32768 kvd linear large_chunks
+ devlink_resource_size_set 87040 kvd hash_single
+ devlink_resource_size_set 60416 kvd hash_double
+ devlink_reload
+ ;;
+ *)
+ check_err 1 "Unknown profile $profile"
+ esac
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh
new file mode 100755
index 000000000..b1fe960e3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=1
+source devlink_lib_spectrum.sh
+
+setup_prepare()
+{
+ devlink_sp_read_kvd_defaults
+}
+
+cleanup()
+{
+ pre_cleanup
+ devlink_sp_size_kvd_to_default
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+profiles_test()
+{
+ local i
+
+ log_info "Running profile tests"
+
+ for i in $KVD_PROFILES; do
+ RET=0
+ devlink_sp_resource_kvd_profile_set $i
+ log_test "'$i' profile"
+ done
+
+ # Default is explicitly tested at end to ensure it's actually applied
+ RET=0
+ devlink_sp_resource_kvd_profile_set "default"
+ log_test "'default' profile"
+}
+
+resources_min_test()
+{
+ local size
+ local i
+ local j
+
+ log_info "Running KVD-minimum tests"
+
+ for i in $KVD_CHILDREN; do
+ RET=0
+ size=$(devlink_resource_get kvd "$i" | jq '.["size_min"]')
+ devlink_resource_size_set "$size" kvd "$i"
+
+ # In case of linear, need to minimize sub-resources as well
+ if [[ "$i" == "linear" ]]; then
+ for j in $KVDL_CHILDREN; do
+ devlink_resource_size_set 0 kvd linear "$j"
+ done
+ fi
+
+ devlink_reload
+ devlink_sp_size_kvd_to_default
+ log_test "'$i' minimize [$size]"
+ done
+}
+
+resources_max_test()
+{
+ local min_size
+ local size
+ local i
+ local j
+
+ log_info "Running KVD-maximum tests"
+ for i in $KVD_CHILDREN; do
+ RET=0
+ devlink_sp_resource_minimize
+
+ # Calculate the maximum possible size for the given partition
+ size=$(devlink_resource_size_get kvd)
+ for j in $KVD_CHILDREN; do
+ if [ "$i" != "$j" ]; then
+ min_size=$(devlink_resource_get kvd "$j" | \
+ jq '.["size_min"]')
+ size=$((size - min_size))
+ fi
+ done
+
+ # Test almost maximum size
+ devlink_resource_size_set "$((size - 128))" kvd "$i"
+ devlink_reload
+ log_test "'$i' almost maximize [$((size - 128))]"
+
+ # Test above maximum size
+ devlink resource set "$DEVLINK_DEV" \
+ path "kvd/$i" size $((size + 128)) &> /dev/null
+ check_fail $? "Set kvd/$i to size $((size + 128)) should fail"
+ log_test "'$i' Overflow rejection [$((size + 128))]"
+
+ # Test maximum size
+ if [ "$i" == "hash_single" ] || [ "$i" == "hash_double" ]; then
+ echo "SKIP: Observed problem with exact max $i"
+ continue
+ fi
+
+ devlink_resource_size_set "$size" kvd "$i"
+ devlink_reload
+ log_test "'$i' maximize [$size]"
+
+ devlink_sp_size_kvd_to_default
+ done
+}
+
+profiles_test
+resources_min_test
+resources_max_test
+
+exit "$RET"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh
new file mode 100644
index 000000000..8d2186c7c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../mirror_gre_scale.sh
+
+mirror_gre_get_target()
+{
+ local should_fail=$1; shift
+
+ if ((! should_fail)); then
+ echo 3
+ else
+ echo 4
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
new file mode 100755
index 000000000..a0a80e1a6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=6
+source ../../../../net/forwarding/lib.sh
+source ../../../../net/forwarding/tc_common.sh
+source devlink_lib_spectrum.sh
+
+current_test=""
+
+cleanup()
+{
+ pre_cleanup
+ if [ ! -z $current_test ]; then
+ ${current_test}_cleanup
+ fi
+ devlink_sp_size_kvd_to_default
+}
+
+devlink_sp_read_kvd_defaults
+trap cleanup EXIT
+
+ALL_TESTS="router tc_flower mirror_gre"
+for current_test in ${TESTS:-$ALL_TESTS}; do
+ source ${current_test}_scale.sh
+
+ num_netifs_var=${current_test^^}_NUM_NETIFS
+ num_netifs=${!num_netifs_var:-$NUM_NETIFS}
+
+ for profile in $KVD_PROFILES; do
+ RET=0
+ devlink_sp_resource_kvd_profile_set $profile
+ if [[ $RET -gt 0 ]]; then
+ log_test "'$current_test' [$profile] setting"
+ continue
+ fi
+
+ for should_fail in 0 1; do
+ RET=0
+ target=$(${current_test}_get_target "$should_fail")
+ ${current_test}_setup_prepare
+ setup_wait $num_netifs
+ ${current_test}_test "$target" "$should_fail"
+ ${current_test}_cleanup
+ if [[ "$should_fail" -eq 0 ]]; then
+ log_test "'$current_test' [$profile] $target"
+ else
+ log_test "'$current_test' [$profile] overflow $target"
+ fi
+ done
+ done
+done
+current_test=""
+
+exit "$RET"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh
new file mode 100644
index 000000000..21c4697d5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../router_scale.sh
+
+router_get_target()
+{
+ local should_fail=$1
+ local target
+
+ target=$(devlink_resource_size_get kvd hash_single)
+
+ if [[ $should_fail -eq 0 ]]; then
+ target=$((target * 85 / 100))
+ else
+ target=$((target + 1))
+ fi
+
+ echo $target
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh
new file mode 100644
index 000000000..f9bfd8937
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_flower_scale.sh
+
+tc_flower_get_target()
+{
+ local should_fail=$1; shift
+
+ # 6144 (6x1024) is the theoretical maximum.
+ # One bank of 512 rules is taken by the 18-byte MC router rule.
+ # One rule is the ACL catch-all.
+ # 6144 - 512 - 1 = 5631
+ local target=5631
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
new file mode 100644
index 000000000..a6d733d2a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
@@ -0,0 +1,134 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for resource limit of offloaded flower rules. The test adds a given
+# number of flower matches for different IPv6 addresses, then generates traffic,
+# and ensures each was hit exactly once. This file contains functions to set up
+# a testing topology and run the test, and is meant to be sourced from a test
+# script that calls the testing routine with a given number of rules.
+
+TC_FLOWER_NUM_NETIFS=2
+
+tc_flower_h1_create()
+{
+ simple_if_init $h1
+ tc qdisc add dev $h1 clsact
+}
+
+tc_flower_h1_destroy()
+{
+ tc qdisc del dev $h1 clsact
+ simple_if_fini $h1
+}
+
+tc_flower_h2_create()
+{
+ simple_if_init $h2
+ tc qdisc add dev $h2 clsact
+}
+
+tc_flower_h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2
+}
+
+tc_flower_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ vrf_prepare
+
+ tc_flower_h1_create
+ tc_flower_h2_create
+}
+
+tc_flower_cleanup()
+{
+ pre_cleanup
+
+ tc_flower_h2_destroy
+ tc_flower_h1_destroy
+
+ vrf_cleanup
+
+ if [[ -v TC_FLOWER_BATCH_FILE ]]; then
+ rm -f $TC_FLOWER_BATCH_FILE
+ fi
+}
+
+tc_flower_addr()
+{
+ local num=$1; shift
+
+ printf "2001:db8:1::%x" $num
+}
+
+tc_flower_rules_create()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ TC_FLOWER_BATCH_FILE="$(mktemp)"
+
+ for ((i = 0; i < count; ++i)); do
+ cat >> $TC_FLOWER_BATCH_FILE <<-EOF
+ filter add dev $h2 ingress \
+ prot ipv6 \
+ pref 1000 \
+ flower $tcflags dst_ip $(tc_flower_addr $i) \
+ action drop
+ EOF
+ done
+
+ tc -b $TC_FLOWER_BATCH_FILE
+ check_err_fail $should_fail $? "Rule insertion"
+}
+
+__tc_flower_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+ local last=$((count - 1))
+
+ tc_flower_rules_create $count $should_fail
+
+ for ((i = 0; i < count; ++i)); do
+ $MZ $h1 -q -c 1 -t ip -p 20 -b bc -6 \
+ -A 2001:db8:2::1 \
+ -B $(tc_flower_addr $i)
+ done
+
+ MISMATCHES=$(
+ tc -j -s filter show dev $h2 ingress |
+ jq -r '[ .[] | select(.kind == "flower") | .options |
+ values as $rule | .actions[].stats.packets |
+ select(. != 1) | "\(.) on \($rule.keys.dst_ip)" ] |
+ join(", ")'
+ )
+
+ test -z "$MISMATCHES"
+ check_err $? "Expected to capture 1 packet for each IP, but got $MISMATCHES"
+}
+
+tc_flower_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ # We use lower 16 bits of IPv6 address for match. Also there are only 16
+ # bits of rule priority space.
+ if ((count > 65536)); then
+ check_err 1 "Invalid count of $count. At most 65536 rules supported"
+ return
+ fi
+
+ if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then
+ check_err 1 "Could not test offloaded functionality"
+ return
+ fi
+
+ tcflags="skip_sw"
+ __tc_flower_test $count $should_fail
+}
diff --git a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
new file mode 100755
index 000000000..128f0ab24
--- /dev/null
+++ b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
@@ -0,0 +1,200 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+usage() { echo "usbip_test.sh -b <busid> -p <usbip tools path>"; exit 1; }
+
+while getopts "h:b:p:" arg; do
+ case "${arg}" in
+ h)
+ usage
+ ;;
+ b)
+ busid=${OPTARG}
+ ;;
+ p)
+ tools_path=${OPTARG}
+ ;;
+ *)
+ usage
+ ;;
+ esac
+done
+shift $((OPTIND-1))
+
+if [ -z "${busid}" ]; then
+ usage
+fi
+
+echo "Running USB over IP Testing on $busid";
+
+test_end_msg="End of USB over IP Testing on $busid"
+
+if [ $UID != 0 ]; then
+ echo "Please run usbip_test as root [SKIP]"
+ echo $test_end_msg
+ exit $ksft_skip
+fi
+
+echo "Load usbip_host module"
+if ! /sbin/modprobe -q -n usbip_host; then
+ echo "usbip_test: module usbip_host is not found [SKIP]"
+ echo $test_end_msg
+ exit $ksft_skip
+fi
+
+if /sbin/modprobe -q usbip_host; then
+ echo "usbip_test: module usbip_host is loaded [OK]"
+else
+ echo "usbip_test: module usbip_host failed to load [FAIL]"
+ echo $test_end_msg
+ exit 1
+fi
+
+echo "Load vhci_hcd module"
+if /sbin/modprobe -q vhci_hcd; then
+ echo "usbip_test: module vhci_hcd is loaded [OK]"
+else
+ echo "usbip_test: module vhci_hcd failed to load [FAIL]"
+ echo $test_end_msg
+ exit 1
+fi
+echo "=============================================================="
+
+cd $tools_path;
+
+if [ ! -f src/usbip ]; then
+ echo "Please build usbip tools"
+ echo $test_end_msg
+ exit $ksft_skip
+fi
+
+echo "Expect to see export-able devices";
+src/usbip list -l;
+echo "=============================================================="
+
+echo "Run lsusb to see all usb devices"
+lsusb -t;
+echo "=============================================================="
+
+src/usbipd -D;
+
+echo "Get exported devices from localhost - expect to see none";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "bind devices";
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - bound devices should be under usbip_host control"
+lsusb -t;
+echo "=============================================================="
+
+echo "bind devices - expect already bound messages"
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Get exported devices from localhost - expect to see exported devices";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "unbind devices";
+src/usbip unbind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - bound devices should be rebound to original drivers"
+lsusb -t;
+echo "=============================================================="
+
+echo "unbind devices - expect no devices bound message";
+src/usbip unbind -b $busid;
+echo "=============================================================="
+
+echo "Get exported devices from localhost - expect to see none";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - should fail with no devices"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+echo "bind devices";
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "List imported devices - expect to see exported devices";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - should work"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+# Wait for sysfs file to be updated. Without this sleep, usbip port
+# shows no imported devices.
+sleep 3;
+
+echo "List imported devices - expect to see imported devices";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - expect already imported messages"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+echo "Un-import devices";
+src/usbip detach -p 00;
+src/usbip detach -p 01;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Un-import devices - expect no devices to detach messages";
+src/usbip detach -p 00;
+src/usbip detach -p 01;
+echo "=============================================================="
+
+echo "Detach invalid port tests - expect invalid port error message";
+src/usbip detach -p 100;
+echo "=============================================================="
+
+echo "Expect to see export-able devices";
+src/usbip list -l;
+echo "=============================================================="
+
+echo "Remove usbip_host module";
+rmmod usbip_host;
+
+echo "Run lsusb - bound devices should be rebound to original drivers"
+lsusb -t;
+echo "=============================================================="
+
+echo "Run bind without usbip_host - expect fail"
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - devices that failed to bind aren't bound to any driver"
+lsusb -t;
+echo "=============================================================="
+
+echo "modprobe usbip_host - does it work?"
+/sbin/modprobe usbip_host
+echo "Should see -busid- is not in match_busid table... skip! dmesg"
+echo "=============================================================="
+dmesg | grep "is not in match_busid table"
+echo "=============================================================="
+
+echo $test_end_msg
diff --git a/tools/testing/selftests/efivarfs/.gitignore b/tools/testing/selftests/efivarfs/.gitignore
new file mode 100644
index 000000000..336184935
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/.gitignore
@@ -0,0 +1,2 @@
+create-read
+open-unlink
diff --git a/tools/testing/selftests/efivarfs/Makefile b/tools/testing/selftests/efivarfs/Makefile
new file mode 100644
index 000000000..c49dcea69
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/Makefile
@@ -0,0 +1,7 @@
+CFLAGS = -Wall
+
+TEST_GEN_FILES := open-unlink create-read
+TEST_PROGS := efivarfs.sh
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/efivarfs/config b/tools/testing/selftests/efivarfs/config
new file mode 100644
index 000000000..4e151f100
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/config
@@ -0,0 +1 @@
+CONFIG_EFIVAR_FS=y
diff --git a/tools/testing/selftests/efivarfs/create-read.c b/tools/testing/selftests/efivarfs/create-read.c
new file mode 100644
index 000000000..9674a1939
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/create-read.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+
+int main(int argc, char **argv)
+{
+ const char *path;
+ char buf[4];
+ int fd, rc;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s <path>\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ path = argv[1];
+
+ /* create a test variable */
+ fd = open(path, O_RDWR | O_CREAT, 0600);
+ if (fd < 0) {
+ perror("open(O_WRONLY)");
+ return EXIT_FAILURE;
+ }
+
+ rc = read(fd, buf, sizeof(buf));
+ if (rc != 0) {
+ fprintf(stderr, "Reading a new var should return EOF\n");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh
new file mode 100755
index 000000000..a47029a79
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/efivarfs.sh
@@ -0,0 +1,215 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+efivarfs_mount=/sys/firmware/efi/efivars
+test_guid=210be57c-9849-4fc7-a635-e6382d1aec27
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+check_prereqs()
+{
+ local msg="skip all tests:"
+
+ if [ $UID != 0 ]; then
+ echo $msg must be run as root >&2
+ exit $ksft_skip
+ fi
+
+ if ! grep -q "^\S\+ $efivarfs_mount efivarfs" /proc/mounts; then
+ echo $msg efivarfs is not mounted on $efivarfs_mount >&2
+ exit $ksft_skip
+ fi
+}
+
+run_test()
+{
+ local test="$1"
+
+ echo "--------------------"
+ echo "running $test"
+ echo "--------------------"
+
+ if [ "$(type -t $test)" = 'function' ]; then
+ ( $test )
+ else
+ ( ./$test )
+ fi
+
+ if [ $? -ne 0 ]; then
+ echo " [FAIL]"
+ rc=1
+ else
+ echo " [PASS]"
+ fi
+}
+
+test_create()
+{
+ local attrs='\x07\x00\x00\x00'
+ local file=$efivarfs_mount/$FUNCNAME-$test_guid
+
+ printf "$attrs\x00" > $file
+
+ if [ ! -e $file ]; then
+ echo "$file couldn't be created" >&2
+ exit 1
+ fi
+
+ if [ $(stat -c %s $file) -ne 5 ]; then
+ echo "$file has invalid size" >&2
+ exit 1
+ fi
+}
+
+test_create_empty()
+{
+ local file=$efivarfs_mount/$FUNCNAME-$test_guid
+
+ : > $file
+
+ if [ ! -e $file ]; then
+ echo "$file can not be created without writing" >&2
+ exit 1
+ fi
+}
+
+test_create_read()
+{
+ local file=$efivarfs_mount/$FUNCNAME-$test_guid
+ ./create-read $file
+}
+
+test_delete()
+{
+ local attrs='\x07\x00\x00\x00'
+ local file=$efivarfs_mount/$FUNCNAME-$test_guid
+
+ printf "$attrs\x00" > $file
+
+ if [ ! -e $file ]; then
+ echo "$file couldn't be created" >&2
+ exit 1
+ fi
+
+ rm $file 2>/dev/null
+ if [ $? -ne 0 ]; then
+ chattr -i $file
+ rm $file
+ fi
+
+ if [ -e $file ]; then
+ echo "$file couldn't be deleted" >&2
+ exit 1
+ fi
+
+}
+
+# test that we can remove a variable by issuing a write with only
+# attributes specified
+test_zero_size_delete()
+{
+ local attrs='\x07\x00\x00\x00'
+ local file=$efivarfs_mount/$FUNCNAME-$test_guid
+
+ printf "$attrs\x00" > $file
+
+ if [ ! -e $file ]; then
+ echo "$file does not exist" >&2
+ exit 1
+ fi
+
+ chattr -i $file
+ printf "$attrs" > $file
+
+ if [ -e $file ]; then
+ echo "$file should have been deleted" >&2
+ exit 1
+ fi
+}
+
+test_open_unlink()
+{
+ local file=$efivarfs_mount/$FUNCNAME-$test_guid
+ ./open-unlink $file
+}
+
+# test that we can create a range of filenames
+test_valid_filenames()
+{
+ local attrs='\x07\x00\x00\x00'
+ local ret=0
+
+ local file_list="abc dump-type0-11-1-1362436005 1234 -"
+ for f in $file_list; do
+ local file=$efivarfs_mount/$f-$test_guid
+
+ printf "$attrs\x00" > $file
+
+ if [ ! -e $file ]; then
+ echo "$file could not be created" >&2
+ ret=1
+ else
+ rm $file 2>/dev/null
+ if [ $? -ne 0 ]; then
+ chattr -i $file
+ rm $file
+ fi
+ fi
+ done
+
+ exit $ret
+}
+
+test_invalid_filenames()
+{
+ local attrs='\x07\x00\x00\x00'
+ local ret=0
+
+ local file_list="
+ -1234-1234-1234-123456789abc
+ foo
+ foo-bar
+ -foo-
+ foo-barbazba-foob-foob-foob-foobarbazfoo
+ foo-------------------------------------
+ -12345678-1234-1234-1234-123456789abc
+ a-12345678=1234-1234-1234-123456789abc
+ a-12345678-1234=1234-1234-123456789abc
+ a-12345678-1234-1234=1234-123456789abc
+ a-12345678-1234-1234-1234=123456789abc
+ 1112345678-1234-1234-1234-123456789abc"
+
+ for f in $file_list; do
+ local file=$efivarfs_mount/$f
+
+ printf "$attrs\x00" 2>/dev/null > $file
+
+ if [ -e $file ]; then
+ echo "Creating $file should have failed" >&2
+ rm $file 2>/dev/null
+ if [ $? -ne 0 ]; then
+ chattr -i $file
+ rm $file
+ fi
+ ret=1
+ fi
+ done
+
+ exit $ret
+}
+
+check_prereqs
+
+rc=0
+
+run_test test_create
+run_test test_create_empty
+run_test test_create_read
+run_test test_delete
+run_test test_zero_size_delete
+run_test test_open_unlink
+run_test test_valid_filenames
+run_test test_invalid_filenames
+
+exit $rc
diff --git a/tools/testing/selftests/efivarfs/open-unlink.c b/tools/testing/selftests/efivarfs/open-unlink.c
new file mode 100644
index 000000000..562742d44
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/open-unlink.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <linux/fs.h>
+
+static int set_immutable(const char *path, int immutable)
+{
+ unsigned int flags;
+ int fd;
+ int rc;
+ int error;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ rc = ioctl(fd, FS_IOC_GETFLAGS, &flags);
+ if (rc < 0) {
+ error = errno;
+ close(fd);
+ errno = error;
+ return rc;
+ }
+
+ if (immutable)
+ flags |= FS_IMMUTABLE_FL;
+ else
+ flags &= ~FS_IMMUTABLE_FL;
+
+ rc = ioctl(fd, FS_IOC_SETFLAGS, &flags);
+ error = errno;
+ close(fd);
+ errno = error;
+ return rc;
+}
+
+static int get_immutable(const char *path)
+{
+ unsigned int flags;
+ int fd;
+ int rc;
+ int error;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ rc = ioctl(fd, FS_IOC_GETFLAGS, &flags);
+ if (rc < 0) {
+ error = errno;
+ close(fd);
+ errno = error;
+ return rc;
+ }
+ close(fd);
+ if (flags & FS_IMMUTABLE_FL)
+ return 1;
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ const char *path;
+ char buf[5];
+ int fd, rc;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s <path>\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ path = argv[1];
+
+ /* attributes: EFI_VARIABLE_NON_VOLATILE |
+ * EFI_VARIABLE_BOOTSERVICE_ACCESS |
+ * EFI_VARIABLE_RUNTIME_ACCESS
+ */
+ *(uint32_t *)buf = 0x7;
+ buf[4] = 0;
+
+ /* create a test variable */
+ fd = open(path, O_WRONLY | O_CREAT, 0600);
+ if (fd < 0) {
+ perror("open(O_WRONLY)");
+ return EXIT_FAILURE;
+ }
+
+ rc = write(fd, buf, sizeof(buf));
+ if (rc != sizeof(buf)) {
+ perror("write");
+ return EXIT_FAILURE;
+ }
+
+ close(fd);
+
+ rc = get_immutable(path);
+ if (rc < 0) {
+ perror("ioctl(FS_IOC_GETFLAGS)");
+ return EXIT_FAILURE;
+ } else if (rc) {
+ rc = set_immutable(path, 0);
+ if (rc < 0) {
+ perror("ioctl(FS_IOC_SETFLAGS)");
+ return EXIT_FAILURE;
+ }
+ }
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ perror("open");
+ return EXIT_FAILURE;
+ }
+
+ if (unlink(path) < 0) {
+ perror("unlink");
+ return EXIT_FAILURE;
+ }
+
+ rc = read(fd, buf, sizeof(buf));
+ if (rc > 0) {
+ fprintf(stderr, "reading from an unlinked variable "
+ "shouldn't be possible\n");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
new file mode 100644
index 000000000..64073e050
--- /dev/null
+++ b/tools/testing/selftests/exec/.gitignore
@@ -0,0 +1,9 @@
+subdir*
+script*
+execveat
+execveat.symlink
+execveat.moved
+execveat.path.ephemeral
+execveat.ephemeral
+execveat.denatured
+xxxxxxxx* \ No newline at end of file
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
new file mode 100644
index 000000000..427c41ba5
--- /dev/null
+++ b/tools/testing/selftests/exec/Makefile
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS = -Wall
+
+TEST_GEN_PROGS := execveat
+TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
+# Makefile is a run-time dependency, since it's accessed by the execveat test
+TEST_FILES := Makefile
+
+EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx*
+
+include ../lib.mk
+
+$(OUTPUT)/subdir:
+ mkdir -p $@
+$(OUTPUT)/script:
+ echo '#!/bin/sh' > $@
+ echo 'exit $$*' >> $@
+ chmod +x $@
+$(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat
+ cd $(OUTPUT) && ln -s -f $(shell basename $<) $(shell basename $@)
+$(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat
+ cp $< $@
+ chmod -x $@
+
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
new file mode 100644
index 000000000..47cbf54d0
--- /dev/null
+++ b/tools/testing/selftests/exec/execveat.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright (c) 2014 Google, Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2
+ *
+ * Selftests for execveat(2).
+ */
+
+#define _GNU_SOURCE /* to get O_PATH, AT_EMPTY_PATH */
+#include <sys/sendfile.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+static char longpath[2 * PATH_MAX] = "";
+static char *envp[] = { "IN_TEST=yes", NULL, NULL };
+static char *argv[] = { "execveat", "99", NULL };
+
+static int execveat_(int fd, const char *path, char **argv, char **envp,
+ int flags)
+{
+#ifdef __NR_execveat
+ return syscall(__NR_execveat, fd, path, argv, envp, flags);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+#define check_execveat_fail(fd, path, flags, errno) \
+ _check_execveat_fail(fd, path, flags, errno, #errno)
+static int _check_execveat_fail(int fd, const char *path, int flags,
+ int expected_errno, const char *errno_str)
+{
+ int rc;
+
+ errno = 0;
+ printf("Check failure of execveat(%d, '%s', %d) with %s... ",
+ fd, path?:"(null)", flags, errno_str);
+ rc = execveat_(fd, path, argv, envp, flags);
+
+ if (rc > 0) {
+ printf("[FAIL] (unexpected success from execveat(2))\n");
+ return 1;
+ }
+ if (errno != expected_errno) {
+ printf("[FAIL] (expected errno %d (%s) not %d (%s)\n",
+ expected_errno, strerror(expected_errno),
+ errno, strerror(errno));
+ return 1;
+ }
+ printf("[OK]\n");
+ return 0;
+}
+
+static int check_execveat_invoked_rc(int fd, const char *path, int flags,
+ int expected_rc, int expected_rc2)
+{
+ int status;
+ int rc;
+ pid_t child;
+ int pathlen = path ? strlen(path) : 0;
+
+ if (pathlen > 40)
+ printf("Check success of execveat(%d, '%.20s...%s', %d)... ",
+ fd, path, (path + pathlen - 20), flags);
+ else
+ printf("Check success of execveat(%d, '%s', %d)... ",
+ fd, path?:"(null)", flags);
+ child = fork();
+ if (child < 0) {
+ printf("[FAIL] (fork() failed)\n");
+ return 1;
+ }
+ if (child == 0) {
+ /* Child: do execveat(). */
+ rc = execveat_(fd, path, argv, envp, flags);
+ printf("[FAIL]: execveat() failed, rc=%d errno=%d (%s)\n",
+ rc, errno, strerror(errno));
+ exit(1); /* should not reach here */
+ }
+ /* Parent: wait for & check child's exit status. */
+ rc = waitpid(child, &status, 0);
+ if (rc != child) {
+ printf("[FAIL] (waitpid(%d,...) returned %d)\n", child, rc);
+ return 1;
+ }
+ if (!WIFEXITED(status)) {
+ printf("[FAIL] (child %d did not exit cleanly, status=%08x)\n",
+ child, status);
+ return 1;
+ }
+ if ((WEXITSTATUS(status) != expected_rc) &&
+ (WEXITSTATUS(status) != expected_rc2)) {
+ printf("[FAIL] (child %d exited with %d not %d nor %d)\n",
+ child, WEXITSTATUS(status), expected_rc, expected_rc2);
+ return 1;
+ }
+ printf("[OK]\n");
+ return 0;
+}
+
+static int check_execveat(int fd, const char *path, int flags)
+{
+ return check_execveat_invoked_rc(fd, path, flags, 99, 99);
+}
+
+static char *concat(const char *left, const char *right)
+{
+ char *result = malloc(strlen(left) + strlen(right) + 1);
+
+ strcpy(result, left);
+ strcat(result, right);
+ return result;
+}
+
+static int open_or_die(const char *filename, int flags)
+{
+ int fd = open(filename, flags);
+
+ if (fd < 0) {
+ printf("Failed to open '%s'; "
+ "check prerequisites are available\n", filename);
+ exit(1);
+ }
+ return fd;
+}
+
+static void exe_cp(const char *src, const char *dest)
+{
+ int in_fd = open_or_die(src, O_RDONLY);
+ int out_fd = open(dest, O_RDWR|O_CREAT|O_TRUNC, 0755);
+ struct stat info;
+
+ fstat(in_fd, &info);
+ sendfile(out_fd, in_fd, NULL, info.st_size);
+ close(in_fd);
+ close(out_fd);
+}
+
+#define XX_DIR_LEN 200
+static int check_execveat_pathmax(int root_dfd, const char *src, int is_script)
+{
+ int fail = 0;
+ int ii, count, len;
+ char longname[XX_DIR_LEN + 1];
+ int fd;
+
+ if (*longpath == '\0') {
+ /* Create a filename close to PATH_MAX in length */
+ char *cwd = getcwd(NULL, 0);
+
+ if (!cwd) {
+ printf("Failed to getcwd(), errno=%d (%s)\n",
+ errno, strerror(errno));
+ return 2;
+ }
+ strcpy(longpath, cwd);
+ strcat(longpath, "/");
+ memset(longname, 'x', XX_DIR_LEN - 1);
+ longname[XX_DIR_LEN - 1] = '/';
+ longname[XX_DIR_LEN] = '\0';
+ count = (PATH_MAX - 3 - strlen(cwd)) / XX_DIR_LEN;
+ for (ii = 0; ii < count; ii++) {
+ strcat(longpath, longname);
+ mkdir(longpath, 0755);
+ }
+ len = (PATH_MAX - 3 - strlen(cwd)) - (count * XX_DIR_LEN);
+ if (len <= 0)
+ len = 1;
+ memset(longname, 'y', len);
+ longname[len] = '\0';
+ strcat(longpath, longname);
+ free(cwd);
+ }
+ exe_cp(src, longpath);
+
+ /*
+ * Execute as a pre-opened file descriptor, which works whether this is
+ * a script or not (because the interpreter sees a filename like
+ * "/dev/fd/20").
+ */
+ fd = open(longpath, O_RDONLY);
+ if (fd > 0) {
+ printf("Invoke copy of '%s' via filename of length %zu:\n",
+ src, strlen(longpath));
+ fail += check_execveat(fd, "", AT_EMPTY_PATH);
+ } else {
+ printf("Failed to open length %zu filename, errno=%d (%s)\n",
+ strlen(longpath), errno, strerror(errno));
+ fail++;
+ }
+
+ /*
+ * Execute as a long pathname relative to "/". If this is a script,
+ * the interpreter will launch but fail to open the script because its
+ * name ("/dev/fd/5/xxx....") is bigger than PATH_MAX.
+ *
+ * The failure code is usually 127 (POSIX: "If a command is not found,
+ * the exit status shall be 127."), but some systems give 126 (POSIX:
+ * "If the command name is found, but it is not an executable utility,
+ * the exit status shall be 126."), so allow either.
+ */
+ if (is_script)
+ fail += check_execveat_invoked_rc(root_dfd, longpath + 1, 0,
+ 127, 126);
+ else
+ fail += check_execveat(root_dfd, longpath + 1, 0);
+
+ return fail;
+}
+
+static int run_tests(void)
+{
+ int fail = 0;
+ char *fullname = realpath("execveat", NULL);
+ char *fullname_script = realpath("script", NULL);
+ char *fullname_symlink = concat(fullname, ".symlink");
+ int subdir_dfd = open_or_die("subdir", O_DIRECTORY|O_RDONLY);
+ int subdir_dfd_ephemeral = open_or_die("subdir.ephemeral",
+ O_DIRECTORY|O_RDONLY);
+ int dot_dfd = open_or_die(".", O_DIRECTORY|O_RDONLY);
+ int root_dfd = open_or_die("/", O_DIRECTORY|O_RDONLY);
+ int dot_dfd_path = open_or_die(".", O_DIRECTORY|O_RDONLY|O_PATH);
+ int dot_dfd_cloexec = open_or_die(".", O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ int fd = open_or_die("execveat", O_RDONLY);
+ int fd_path = open_or_die("execveat", O_RDONLY|O_PATH);
+ int fd_symlink = open_or_die("execveat.symlink", O_RDONLY);
+ int fd_denatured = open_or_die("execveat.denatured", O_RDONLY);
+ int fd_denatured_path = open_or_die("execveat.denatured",
+ O_RDONLY|O_PATH);
+ int fd_script = open_or_die("script", O_RDONLY);
+ int fd_ephemeral = open_or_die("execveat.ephemeral", O_RDONLY);
+ int fd_ephemeral_path = open_or_die("execveat.path.ephemeral",
+ O_RDONLY|O_PATH);
+ int fd_script_ephemeral = open_or_die("script.ephemeral", O_RDONLY);
+ int fd_cloexec = open_or_die("execveat", O_RDONLY|O_CLOEXEC);
+ int fd_script_cloexec = open_or_die("script", O_RDONLY|O_CLOEXEC);
+
+ /* Check if we have execveat at all, and bail early if not */
+ errno = 0;
+ execveat_(-1, NULL, NULL, NULL, 0);
+ if (errno == ENOSYS) {
+ ksft_exit_skip(
+ "ENOSYS calling execveat - no kernel support?\n");
+ }
+
+ /* Change file position to confirm it doesn't affect anything */
+ lseek(fd, 10, SEEK_SET);
+
+ /* Normal executable file: */
+ /* dfd + path */
+ fail += check_execveat(subdir_dfd, "../execveat", 0);
+ fail += check_execveat(dot_dfd, "execveat", 0);
+ fail += check_execveat(dot_dfd_path, "execveat", 0);
+ /* absolute path */
+ fail += check_execveat(AT_FDCWD, fullname, 0);
+ /* absolute path with nonsense dfd */
+ fail += check_execveat(99, fullname, 0);
+ /* fd + no path */
+ fail += check_execveat(fd, "", AT_EMPTY_PATH);
+ /* O_CLOEXEC fd + no path */
+ fail += check_execveat(fd_cloexec, "", AT_EMPTY_PATH);
+ /* O_PATH fd */
+ fail += check_execveat(fd_path, "", AT_EMPTY_PATH);
+
+ /* Mess with executable file that's already open: */
+ /* fd + no path to a file that's been renamed */
+ rename("execveat.ephemeral", "execveat.moved");
+ fail += check_execveat(fd_ephemeral, "", AT_EMPTY_PATH);
+ /* fd + no path to a file that's been deleted */
+ unlink("execveat.moved"); /* remove the file now fd open */
+ fail += check_execveat(fd_ephemeral, "", AT_EMPTY_PATH);
+
+ /* Mess with executable file that's already open with O_PATH */
+ /* fd + no path to a file that's been deleted */
+ unlink("execveat.path.ephemeral");
+ fail += check_execveat(fd_ephemeral_path, "", AT_EMPTY_PATH);
+
+ /* Invalid argument failures */
+ fail += check_execveat_fail(fd, "", 0, ENOENT);
+ fail += check_execveat_fail(fd, NULL, AT_EMPTY_PATH, EFAULT);
+
+ /* Symlink to executable file: */
+ /* dfd + path */
+ fail += check_execveat(dot_dfd, "execveat.symlink", 0);
+ fail += check_execveat(dot_dfd_path, "execveat.symlink", 0);
+ /* absolute path */
+ fail += check_execveat(AT_FDCWD, fullname_symlink, 0);
+ /* fd + no path, even with AT_SYMLINK_NOFOLLOW (already followed) */
+ fail += check_execveat(fd_symlink, "", AT_EMPTY_PATH);
+ fail += check_execveat(fd_symlink, "",
+ AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW);
+
+ /* Symlink fails when AT_SYMLINK_NOFOLLOW set: */
+ /* dfd + path */
+ fail += check_execveat_fail(dot_dfd, "execveat.symlink",
+ AT_SYMLINK_NOFOLLOW, ELOOP);
+ fail += check_execveat_fail(dot_dfd_path, "execveat.symlink",
+ AT_SYMLINK_NOFOLLOW, ELOOP);
+ /* absolute path */
+ fail += check_execveat_fail(AT_FDCWD, fullname_symlink,
+ AT_SYMLINK_NOFOLLOW, ELOOP);
+
+ /* Shell script wrapping executable file: */
+ /* dfd + path */
+ fail += check_execveat(subdir_dfd, "../script", 0);
+ fail += check_execveat(dot_dfd, "script", 0);
+ fail += check_execveat(dot_dfd_path, "script", 0);
+ /* absolute path */
+ fail += check_execveat(AT_FDCWD, fullname_script, 0);
+ /* fd + no path */
+ fail += check_execveat(fd_script, "", AT_EMPTY_PATH);
+ fail += check_execveat(fd_script, "",
+ AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW);
+ /* O_CLOEXEC fd fails for a script (as script file inaccessible) */
+ fail += check_execveat_fail(fd_script_cloexec, "", AT_EMPTY_PATH,
+ ENOENT);
+ fail += check_execveat_fail(dot_dfd_cloexec, "script", 0, ENOENT);
+
+ /* Mess with script file that's already open: */
+ /* fd + no path to a file that's been renamed */
+ rename("script.ephemeral", "script.moved");
+ fail += check_execveat(fd_script_ephemeral, "", AT_EMPTY_PATH);
+ /* fd + no path to a file that's been deleted */
+ unlink("script.moved"); /* remove the file while fd open */
+ fail += check_execveat(fd_script_ephemeral, "", AT_EMPTY_PATH);
+
+ /* Rename a subdirectory in the path: */
+ rename("subdir.ephemeral", "subdir.moved");
+ fail += check_execveat(subdir_dfd_ephemeral, "../script", 0);
+ fail += check_execveat(subdir_dfd_ephemeral, "script", 0);
+ /* Remove the subdir and its contents */
+ unlink("subdir.moved/script");
+ unlink("subdir.moved");
+ /* Shell loads via deleted subdir OK because name starts with .. */
+ fail += check_execveat(subdir_dfd_ephemeral, "../script", 0);
+ fail += check_execveat_fail(subdir_dfd_ephemeral, "script", 0, ENOENT);
+
+ /* Flag values other than AT_SYMLINK_NOFOLLOW => EINVAL */
+ fail += check_execveat_fail(dot_dfd, "execveat", 0xFFFF, EINVAL);
+ /* Invalid path => ENOENT */
+ fail += check_execveat_fail(dot_dfd, "no-such-file", 0, ENOENT);
+ fail += check_execveat_fail(dot_dfd_path, "no-such-file", 0, ENOENT);
+ fail += check_execveat_fail(AT_FDCWD, "no-such-file", 0, ENOENT);
+ /* Attempt to execute directory => EACCES */
+ fail += check_execveat_fail(dot_dfd, "", AT_EMPTY_PATH, EACCES);
+ /* Attempt to execute non-executable => EACCES */
+ fail += check_execveat_fail(dot_dfd, "Makefile", 0, EACCES);
+ fail += check_execveat_fail(fd_denatured, "", AT_EMPTY_PATH, EACCES);
+ fail += check_execveat_fail(fd_denatured_path, "", AT_EMPTY_PATH,
+ EACCES);
+ /* Attempt to execute nonsense FD => EBADF */
+ fail += check_execveat_fail(99, "", AT_EMPTY_PATH, EBADF);
+ fail += check_execveat_fail(99, "execveat", 0, EBADF);
+ /* Attempt to execute relative to non-directory => ENOTDIR */
+ fail += check_execveat_fail(fd, "execveat", 0, ENOTDIR);
+
+ fail += check_execveat_pathmax(root_dfd, "execveat", 0);
+ fail += check_execveat_pathmax(root_dfd, "script", 1);
+ return fail;
+}
+
+static void prerequisites(void)
+{
+ int fd;
+ const char *script = "#!/bin/sh\nexit $*\n";
+
+ /* Create ephemeral copies of files */
+ exe_cp("execveat", "execveat.ephemeral");
+ exe_cp("execveat", "execveat.path.ephemeral");
+ exe_cp("script", "script.ephemeral");
+ mkdir("subdir.ephemeral", 0755);
+
+ fd = open("subdir.ephemeral/script", O_RDWR|O_CREAT|O_TRUNC, 0755);
+ write(fd, script, strlen(script));
+ close(fd);
+}
+
+int main(int argc, char **argv)
+{
+ int ii;
+ int rc;
+ const char *verbose = getenv("VERBOSE");
+
+ if (argc >= 2) {
+ /* If we are invoked with an argument, don't run tests. */
+ const char *in_test = getenv("IN_TEST");
+
+ if (verbose) {
+ printf(" invoked with:");
+ for (ii = 0; ii < argc; ii++)
+ printf(" [%d]='%s'", ii, argv[ii]);
+ printf("\n");
+ }
+
+ /* Check expected environment transferred. */
+ if (!in_test || strcmp(in_test, "yes") != 0) {
+ printf("[FAIL] (no IN_TEST=yes in env)\n");
+ return 1;
+ }
+
+ /* Use the final argument as an exit code. */
+ rc = atoi(argv[argc - 1]);
+ fflush(stdout);
+ } else {
+ prerequisites();
+ if (verbose)
+ envp[1] = "VERBOSE=1";
+ rc = run_tests();
+ if (rc > 0)
+ printf("%d tests failed\n", rc);
+ }
+ return rc;
+}
diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore
new file mode 100644
index 000000000..8449cf671
--- /dev/null
+++ b/tools/testing/selftests/filesystems/.gitignore
@@ -0,0 +1,2 @@
+dnotify_test
+devpts_pts
diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile
new file mode 100644
index 000000000..129880fb4
--- /dev/null
+++ b/tools/testing/selftests/filesystems/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I../../../../usr/include/
+TEST_GEN_PROGS := devpts_pts
+TEST_GEN_PROGS_EXTENDED := dnotify_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/filesystems/devpts_pts.c b/tools/testing/selftests/filesystems/devpts_pts.c
new file mode 100644
index 000000000..b1fc9b916
--- /dev/null
+++ b/tools/testing/selftests/filesystems/devpts_pts.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <asm/ioctls.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include "../kselftest.h"
+
+static bool terminal_dup2(int duplicate, int original)
+{
+ int ret;
+
+ ret = dup2(duplicate, original);
+ if (ret < 0)
+ return false;
+
+ return true;
+}
+
+static int terminal_set_stdfds(int fd)
+{
+ int i;
+
+ if (fd < 0)
+ return 0;
+
+ for (i = 0; i < 3; i++)
+ if (!terminal_dup2(fd, (int[]){STDIN_FILENO, STDOUT_FILENO,
+ STDERR_FILENO}[i]))
+ return -1;
+
+ return 0;
+}
+
+static int login_pty(int fd)
+{
+ int ret;
+
+ setsid();
+
+ ret = ioctl(fd, TIOCSCTTY, NULL);
+ if (ret < 0)
+ return -1;
+
+ ret = terminal_set_stdfds(fd);
+ if (ret < 0)
+ return -1;
+
+ if (fd > STDERR_FILENO)
+ close(fd);
+
+ return 0;
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+ return -1;
+ }
+ if (ret != pid)
+ goto again;
+
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
+ return -1;
+
+ return 0;
+}
+
+static int resolve_procfd_symlink(int fd, char *buf, size_t buflen)
+{
+ int ret;
+ char procfd[4096];
+
+ ret = snprintf(procfd, 4096, "/proc/self/fd/%d", fd);
+ if (ret < 0 || ret >= 4096)
+ return -1;
+
+ ret = readlink(procfd, buf, buflen);
+ if (ret < 0 || (size_t)ret >= buflen)
+ return -1;
+
+ buf[ret] = '\0';
+
+ return 0;
+}
+
+static int do_tiocgptpeer(char *ptmx, char *expected_procfd_contents)
+{
+ int ret;
+ int master = -1, slave = -1, fret = -1;
+
+ master = open(ptmx, O_RDWR | O_NOCTTY | O_CLOEXEC);
+ if (master < 0) {
+ fprintf(stderr, "Failed to open \"%s\": %s\n", ptmx,
+ strerror(errno));
+ return -1;
+ }
+
+ /*
+ * grantpt() makes assumptions about /dev/pts/ so ignore it. It's also
+ * not really needed.
+ */
+ ret = unlockpt(master);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to unlock terminal\n");
+ goto do_cleanup;
+ }
+
+#ifdef TIOCGPTPEER
+ slave = ioctl(master, TIOCGPTPEER, O_RDWR | O_NOCTTY | O_CLOEXEC);
+#endif
+ if (slave < 0) {
+ if (errno == EINVAL) {
+ fprintf(stderr, "TIOCGPTPEER is not supported. "
+ "Skipping test.\n");
+ fret = KSFT_SKIP;
+ } else {
+ fprintf(stderr,
+ "Failed to perform TIOCGPTPEER ioctl\n");
+ fret = EXIT_FAILURE;
+ }
+ goto do_cleanup;
+ }
+
+ pid_t pid = fork();
+ if (pid < 0)
+ goto do_cleanup;
+
+ if (pid == 0) {
+ char buf[4096];
+
+ ret = login_pty(slave);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to setup terminal\n");
+ _exit(EXIT_FAILURE);
+ }
+
+ ret = resolve_procfd_symlink(STDIN_FILENO, buf, sizeof(buf));
+ if (ret < 0) {
+ fprintf(stderr, "Failed to retrieve pathname of pts "
+ "slave file descriptor\n");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (strncmp(expected_procfd_contents, buf,
+ strlen(expected_procfd_contents)) != 0) {
+ fprintf(stderr, "Received invalid contents for "
+ "\"/proc/<pid>/fd/%d\" symlink: %s\n",
+ STDIN_FILENO, buf);
+ _exit(-1);
+ }
+
+ fprintf(stderr, "Contents of \"/proc/<pid>/fd/%d\" "
+ "symlink are valid: %s\n", STDIN_FILENO, buf);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ ret = wait_for_pid(pid);
+ if (ret < 0)
+ goto do_cleanup;
+
+ fret = EXIT_SUCCESS;
+
+do_cleanup:
+ if (master >= 0)
+ close(master);
+ if (slave >= 0)
+ close(slave);
+
+ return fret;
+}
+
+static int verify_non_standard_devpts_mount(void)
+{
+ char *mntpoint;
+ int ret = -1;
+ char devpts[] = P_tmpdir "/devpts_fs_XXXXXX";
+ char ptmx[] = P_tmpdir "/devpts_fs_XXXXXX/ptmx";
+
+ ret = umount("/dev/pts");
+ if (ret < 0) {
+ fprintf(stderr, "Failed to unmount \"/dev/pts\": %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ (void)umount("/dev/ptmx");
+
+ mntpoint = mkdtemp(devpts);
+ if (!mntpoint) {
+ fprintf(stderr, "Failed to create temporary mountpoint: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ ret = mount("devpts", mntpoint, "devpts", MS_NOSUID | MS_NOEXEC,
+ "newinstance,ptmxmode=0666,mode=0620,gid=5");
+ if (ret < 0) {
+ fprintf(stderr, "Failed to mount devpts fs to \"%s\" in new "
+ "mount namespace: %s\n", mntpoint,
+ strerror(errno));
+ unlink(mntpoint);
+ return -1;
+ }
+
+ ret = snprintf(ptmx, sizeof(ptmx), "%s/ptmx", devpts);
+ if (ret < 0 || (size_t)ret >= sizeof(ptmx)) {
+ unlink(mntpoint);
+ return -1;
+ }
+
+ ret = do_tiocgptpeer(ptmx, mntpoint);
+ unlink(mntpoint);
+ if (ret < 0)
+ return -1;
+
+ return 0;
+}
+
+static int verify_ptmx_bind_mount(void)
+{
+ int ret;
+
+ ret = mount("/dev/pts/ptmx", "/dev/ptmx", NULL, MS_BIND, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to bind mount \"/dev/pts/ptmx\" to "
+ "\"/dev/ptmx\" mount namespace\n");
+ return -1;
+ }
+
+ ret = do_tiocgptpeer("/dev/ptmx", "/dev/pts/");
+ if (ret < 0)
+ return -1;
+
+ return 0;
+}
+
+static int verify_invalid_ptmx_bind_mount(void)
+{
+ int ret;
+ char mntpoint_fd;
+ char ptmx[] = P_tmpdir "/devpts_ptmx_XXXXXX";
+
+ mntpoint_fd = mkstemp(ptmx);
+ if (mntpoint_fd < 0) {
+ fprintf(stderr, "Failed to create temporary directory: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ ret = mount("/dev/pts/ptmx", ptmx, NULL, MS_BIND, NULL);
+ close(mntpoint_fd);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to bind mount \"/dev/pts/ptmx\" to "
+ "\"%s\" mount namespace\n", ptmx);
+ return -1;
+ }
+
+ ret = do_tiocgptpeer(ptmx, "/dev/pts/");
+ if (ret == 0)
+ return -1;
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret;
+
+ if (!isatty(STDIN_FILENO)) {
+ fprintf(stderr, "Standard input file descriptor is not attached "
+ "to a terminal. Skipping test\n");
+ exit(KSFT_SKIP);
+ }
+
+ ret = unshare(CLONE_NEWNS);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to unshare mount namespace\n");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to make \"/\" MS_PRIVATE in new mount "
+ "namespace\n");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = verify_ptmx_bind_mount();
+ if (ret < 0)
+ exit(EXIT_FAILURE);
+
+ ret = verify_invalid_ptmx_bind_mount();
+ if (ret < 0)
+ exit(EXIT_FAILURE);
+
+ ret = verify_non_standard_devpts_mount();
+ if (ret < 0)
+ exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/tools/testing/selftests/filesystems/dnotify_test.c b/tools/testing/selftests/filesystems/dnotify_test.c
new file mode 100644
index 000000000..c0a9b2d33
--- /dev/null
+++ b/tools/testing/selftests/filesystems/dnotify_test.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE /* needed to get the defines */
+#include <fcntl.h> /* in glibc 2.2 this has the needed
+ values defined */
+#include <signal.h>
+#include <stdio.h>
+#include <unistd.h>
+
+static volatile int event_fd;
+
+static void handler(int sig, siginfo_t *si, void *data)
+{
+ event_fd = si->si_fd;
+}
+
+int main(void)
+{
+ struct sigaction act;
+ int fd;
+
+ act.sa_sigaction = handler;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ sigaction(SIGRTMIN + 1, &act, NULL);
+
+ fd = open(".", O_RDONLY);
+ fcntl(fd, F_SETSIG, SIGRTMIN + 1);
+ fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT);
+ /* we will now be notified if any of the files
+ in "." is modified or new files are created */
+ while (1) {
+ pause();
+ printf("Got event on fd=%d\n", event_fd);
+ }
+}
diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile
new file mode 100644
index 000000000..261c81f08
--- /dev/null
+++ b/tools/testing/selftests/firmware/Makefile
@@ -0,0 +1,12 @@
+# Makefile for firmware loading selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := fw_run_tests.sh
+TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_lib.sh
+
+include ../lib.mk
+
+# Nothing to clean up.
+clean:
diff --git a/tools/testing/selftests/firmware/config b/tools/testing/selftests/firmware/config
new file mode 100644
index 000000000..bf634dda0
--- /dev/null
+++ b/tools/testing/selftests/firmware/config
@@ -0,0 +1,5 @@
+CONFIG_TEST_FIRMWARE=y
+CONFIG_FW_LOADER=y
+CONFIG_FW_LOADER_USER_HELPER=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh
new file mode 100755
index 000000000..70d18be46
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_fallback.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# This validates that the kernel will fall back to using the fallback mechanism
+# to load firmware it can't find on disk itself. We must request a firmware
+# that the kernel won't find, and any installed helper (e.g. udev) also
+# won't find so that we can do the load ourself manually.
+set -e
+
+TEST_REQS_FW_SYSFS_FALLBACK="yes"
+TEST_REQS_FW_SET_CUSTOM_PATH="no"
+TEST_DIR=$(dirname $0)
+source $TEST_DIR/fw_lib.sh
+
+check_mods
+check_setup
+verify_reqs
+setup_tmp_file
+
+trap "test_finish" EXIT
+
+load_fw()
+{
+ local name="$1"
+ local file="$2"
+
+ # This will block until our load (below) has finished.
+ echo -n "$name" >"$DIR"/trigger_request &
+
+ # Give kernel a chance to react.
+ local timeout=10
+ while [ ! -e "$DIR"/"$name"/loading ]; do
+ sleep 0.1
+ timeout=$(( $timeout - 1 ))
+ if [ "$timeout" -eq 0 ]; then
+ echo "$0: firmware interface never appeared" >&2
+ exit 1
+ fi
+ done
+
+ echo 1 >"$DIR"/"$name"/loading
+ cat "$file" >"$DIR"/"$name"/data
+ echo 0 >"$DIR"/"$name"/loading
+
+ # Wait for request to finish.
+ wait
+}
+
+load_fw_cancel()
+{
+ local name="$1"
+ local file="$2"
+
+ # This will block until our load (below) has finished.
+ echo -n "$name" >"$DIR"/trigger_request 2>/dev/null &
+
+ # Give kernel a chance to react.
+ local timeout=10
+ while [ ! -e "$DIR"/"$name"/loading ]; do
+ sleep 0.1
+ timeout=$(( $timeout - 1 ))
+ if [ "$timeout" -eq 0 ]; then
+ echo "$0: firmware interface never appeared" >&2
+ exit 1
+ fi
+ done
+
+ echo -1 >"$DIR"/"$name"/loading
+
+ # Wait for request to finish.
+ wait
+}
+
+load_fw_custom()
+{
+ if [ ! -e "$DIR"/trigger_custom_fallback ]; then
+ echo "$0: custom fallback trigger not present, ignoring test" >&2
+ exit $ksft_skip
+ fi
+
+ local name="$1"
+ local file="$2"
+
+ echo -n "$name" >"$DIR"/trigger_custom_fallback 2>/dev/null &
+
+ # Give kernel a chance to react.
+ local timeout=10
+ while [ ! -e "$DIR"/"$name"/loading ]; do
+ sleep 0.1
+ timeout=$(( $timeout - 1 ))
+ if [ "$timeout" -eq 0 ]; then
+ echo "$0: firmware interface never appeared" >&2
+ exit 1
+ fi
+ done
+
+ echo 1 >"$DIR"/"$name"/loading
+ cat "$file" >"$DIR"/"$name"/data
+ echo 0 >"$DIR"/"$name"/loading
+
+ # Wait for request to finish.
+ wait
+ return 0
+}
+
+
+load_fw_custom_cancel()
+{
+ if [ ! -e "$DIR"/trigger_custom_fallback ]; then
+ echo "$0: canceling custom fallback trigger not present, ignoring test" >&2
+ exit $ksft_skip
+ fi
+
+ local name="$1"
+ local file="$2"
+
+ echo -n "$name" >"$DIR"/trigger_custom_fallback 2>/dev/null &
+
+ # Give kernel a chance to react.
+ local timeout=10
+ while [ ! -e "$DIR"/"$name"/loading ]; do
+ sleep 0.1
+ timeout=$(( $timeout - 1 ))
+ if [ "$timeout" -eq 0 ]; then
+ echo "$0: firmware interface never appeared" >&2
+ exit 1
+ fi
+ done
+
+ echo -1 >"$DIR"/"$name"/loading
+
+ # Wait for request to finish.
+ wait
+ return 0
+}
+
+load_fw_fallback_with_child()
+{
+ local name="$1"
+ local file="$2"
+
+ # This is the value already set but we want to be explicit
+ echo 4 >/sys/class/firmware/timeout
+
+ sleep 1 &
+ SECONDS_BEFORE=$(date +%s)
+ echo -n "$name" >"$DIR"/trigger_request 2>/dev/null
+ SECONDS_AFTER=$(date +%s)
+ SECONDS_DELTA=$(($SECONDS_AFTER - $SECONDS_BEFORE))
+ if [ "$SECONDS_DELTA" -lt 4 ]; then
+ RET=1
+ else
+ RET=0
+ fi
+ wait
+ return $RET
+}
+
+test_syfs_timeout()
+{
+ DEVPATH="$DIR"/"nope-$NAME"/loading
+
+ # Test failure when doing nothing (timeout works).
+ echo -n 2 >/sys/class/firmware/timeout
+ echo -n "nope-$NAME" >"$DIR"/trigger_request 2>/dev/null &
+
+ # Give the kernel some time to load the loading file, must be less
+ # than the timeout above.
+ sleep 1
+ if [ ! -f $DEVPATH ]; then
+ echo "$0: fallback mechanism immediately cancelled"
+ echo ""
+ echo "The file never appeared: $DEVPATH"
+ echo ""
+ echo "This might be a distribution udev rule setup by your distribution"
+ echo "to immediately cancel all fallback requests, this must be"
+ echo "removed before running these tests. To confirm look for"
+ echo "a firmware rule like /lib/udev/rules.d/50-firmware.rules"
+ echo "and see if you have something like this:"
+ echo ""
+ echo "SUBSYSTEM==\"firmware\", ACTION==\"add\", ATTR{loading}=\"-1\""
+ echo ""
+ echo "If you do remove this file or comment out this line before"
+ echo "proceeding with these tests."
+ exit 1
+ fi
+
+ if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was not expected to match" >&2
+ exit 1
+ else
+ echo "$0: timeout works"
+ fi
+}
+
+run_sysfs_main_tests()
+{
+ test_syfs_timeout
+ # Put timeout high enough for us to do work but not so long that failures
+ # slow down this test too much.
+ echo 4 >/sys/class/firmware/timeout
+
+ # Load this script instead of the desired firmware.
+ load_fw "$NAME" "$0"
+ if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was not expected to match" >&2
+ exit 1
+ else
+ echo "$0: firmware comparison works"
+ fi
+
+ # Do a proper load, which should work correctly.
+ load_fw "$NAME" "$FW"
+ if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was not loaded" >&2
+ exit 1
+ else
+ echo "$0: fallback mechanism works"
+ fi
+
+ load_fw_cancel "nope-$NAME" "$FW"
+ if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was expected to be cancelled" >&2
+ exit 1
+ else
+ echo "$0: cancelling fallback mechanism works"
+ fi
+
+ set +e
+ load_fw_fallback_with_child "nope-signal-$NAME" "$FW"
+ if [ "$?" -eq 0 ]; then
+ echo "$0: SIGCHLD on sync ignored as expected" >&2
+ else
+ echo "$0: error - sync firmware request cancelled due to SIGCHLD" >&2
+ exit 1
+ fi
+ set -e
+}
+
+run_sysfs_custom_load_tests()
+{
+ RANDOM_FILE_PATH=$(setup_random_file)
+ RANDOM_FILE="$(basename $RANDOM_FILE_PATH)"
+ if load_fw_custom "$RANDOM_FILE" "$RANDOM_FILE_PATH" ; then
+ if ! diff -q "$RANDOM_FILE_PATH" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was not loaded" >&2
+ exit 1
+ else
+ echo "$0: custom fallback loading mechanism works"
+ fi
+ fi
+
+ RANDOM_FILE_PATH=$(setup_random_file)
+ RANDOM_FILE="$(basename $RANDOM_FILE_PATH)"
+ if load_fw_custom "$RANDOM_FILE" "$RANDOM_FILE_PATH" ; then
+ if ! diff -q "$RANDOM_FILE_PATH" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was not loaded" >&2
+ exit 1
+ else
+ echo "$0: custom fallback loading mechanism works"
+ fi
+ fi
+
+ RANDOM_FILE_REAL="$RANDOM_FILE_PATH"
+ FAKE_RANDOM_FILE_PATH=$(setup_random_file_fake)
+ FAKE_RANDOM_FILE="$(basename $FAKE_RANDOM_FILE_PATH)"
+
+ if load_fw_custom_cancel "$FAKE_RANDOM_FILE" "$RANDOM_FILE_REAL" ; then
+ if diff -q "$RANDOM_FILE_PATH" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was expected to be cancelled" >&2
+ exit 1
+ else
+ echo "$0: cancelling custom fallback mechanism works"
+ fi
+ fi
+}
+
+if [ "$HAS_FW_LOADER_USER_HELPER_FALLBACK" = "yes" ]; then
+ run_sysfs_main_tests
+fi
+
+run_sysfs_custom_load_tests
+
+exit 0
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
new file mode 100755
index 000000000..a4320c4b4
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -0,0 +1,332 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# This validates that the kernel will load firmware out of its list of
+# firmware locations on disk. Since the user helper does similar work,
+# we reset the custom load directory to a location the user helper doesn't
+# know so we can be sure we're not accidentally testing the user helper.
+set -e
+
+TEST_REQS_FW_SYSFS_FALLBACK="no"
+TEST_REQS_FW_SET_CUSTOM_PATH="yes"
+TEST_DIR=$(dirname $0)
+source $TEST_DIR/fw_lib.sh
+
+check_mods
+check_setup
+verify_reqs
+setup_tmp_file
+
+trap "test_finish" EXIT
+
+if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ # Turn down the timeout so failures don't take so long.
+ echo 1 >/sys/class/firmware/timeout
+fi
+
+if printf '\000' >"$DIR"/trigger_request 2> /dev/null; then
+ echo "$0: empty filename should not succeed" >&2
+ exit 1
+fi
+
+if [ ! -e "$DIR"/trigger_async_request ]; then
+ echo "$0: empty filename: async trigger not present, ignoring test" >&2
+ exit $ksft_skip
+else
+ if printf '\000' >"$DIR"/trigger_async_request 2> /dev/null; then
+ echo "$0: empty filename should not succeed (async)" >&2
+ exit 1
+ fi
+fi
+
+# Request a firmware that doesn't exist, it should fail.
+if echo -n "nope-$NAME" >"$DIR"/trigger_request 2> /dev/null; then
+ echo "$0: firmware shouldn't have loaded" >&2
+ exit 1
+fi
+if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was not expected to match" >&2
+ exit 1
+else
+ if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ echo "$0: timeout works"
+ fi
+fi
+
+# This should succeed via kernel load or will fail after 1 second after
+# being handed over to the user helper, which won't find the fw either.
+if ! echo -n "$NAME" >"$DIR"/trigger_request ; then
+ echo "$0: could not trigger request" >&2
+ exit 1
+fi
+
+# Verify the contents are what we expect.
+if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was not loaded" >&2
+ exit 1
+else
+ echo "$0: filesystem loading works"
+fi
+
+# Try the asynchronous version too
+if [ ! -e "$DIR"/trigger_async_request ]; then
+ echo "$0: firmware loading: async trigger not present, ignoring test" >&2
+ exit $ksft_skip
+else
+ if ! echo -n "$NAME" >"$DIR"/trigger_async_request ; then
+ echo "$0: could not trigger async request" >&2
+ exit 1
+ fi
+
+ # Verify the contents are what we expect.
+ if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was not loaded (async)" >&2
+ exit 1
+ else
+ echo "$0: async filesystem loading works"
+ fi
+fi
+
+### Batched requests tests
+test_config_present()
+{
+ if [ ! -f $DIR/reset ]; then
+ echo "Configuration triggers not present, ignoring test"
+ exit $ksft_skip
+ fi
+}
+
+# Defaults :
+#
+# send_uevent: 1
+# sync_direct: 0
+# name: test-firmware.bin
+# num_requests: 4
+config_reset()
+{
+ echo 1 > $DIR/reset
+}
+
+release_all_firmware()
+{
+ echo 1 > $DIR/release_all_firmware
+}
+
+config_set_name()
+{
+ echo -n $1 > $DIR/config_name
+}
+
+config_set_sync_direct()
+{
+ echo 1 > $DIR/config_sync_direct
+}
+
+config_unset_sync_direct()
+{
+ echo 0 > $DIR/config_sync_direct
+}
+
+config_set_uevent()
+{
+ echo 1 > $DIR/config_send_uevent
+}
+
+config_unset_uevent()
+{
+ echo 0 > $DIR/config_send_uevent
+}
+
+config_trigger_sync()
+{
+ echo -n 1 > $DIR/trigger_batched_requests 2>/dev/null
+}
+
+config_trigger_async()
+{
+ echo -n 1 > $DIR/trigger_batched_requests_async 2> /dev/null
+}
+
+config_set_read_fw_idx()
+{
+ echo -n $1 > $DIR/config_read_fw_idx 2> /dev/null
+}
+
+read_firmwares()
+{
+ for i in $(seq 0 3); do
+ config_set_read_fw_idx $i
+ # Verify the contents are what we expect.
+ # -Z required for now -- check for yourself, md5sum
+ # on $FW and DIR/read_firmware will yield the same. Even
+ # cmp agrees, so something is off.
+ if ! diff -q -Z "$FW" $DIR/read_firmware 2>/dev/null ; then
+ echo "request #$i: firmware was not loaded" >&2
+ exit 1
+ fi
+ done
+}
+
+read_firmwares_expect_nofile()
+{
+ for i in $(seq 0 3); do
+ config_set_read_fw_idx $i
+ # Ensures contents differ
+ if diff -q -Z "$FW" $DIR/read_firmware 2>/dev/null ; then
+ echo "request $i: file was not expected to match" >&2
+ exit 1
+ fi
+ done
+}
+
+test_batched_request_firmware_nofile()
+{
+ echo -n "Batched request_firmware() nofile try #$1: "
+ config_reset
+ config_set_name nope-test-firmware.bin
+ config_trigger_sync
+ read_firmwares_expect_nofile
+ release_all_firmware
+ echo "OK"
+}
+
+test_batched_request_firmware_direct_nofile()
+{
+ echo -n "Batched request_firmware_direct() nofile try #$1: "
+ config_reset
+ config_set_name nope-test-firmware.bin
+ config_set_sync_direct
+ config_trigger_sync
+ release_all_firmware
+ echo "OK"
+}
+
+test_request_firmware_nowait_uevent_nofile()
+{
+ echo -n "Batched request_firmware_nowait(uevent=true) nofile try #$1: "
+ config_reset
+ config_set_name nope-test-firmware.bin
+ config_trigger_async
+ release_all_firmware
+ echo "OK"
+}
+
+test_wait_and_cancel_custom_load()
+{
+ if [ "$HAS_FW_LOADER_USER_HELPER" != "yes" ]; then
+ return
+ fi
+ local timeout=10
+ name=$1
+ while [ ! -e "$DIR"/"$name"/loading ]; do
+ sleep 0.1
+ timeout=$(( $timeout - 1 ))
+ if [ "$timeout" -eq 0 ]; then
+ echo "firmware interface never appeared:" >&2
+ echo "$DIR/$name/loading" >&2
+ exit 1
+ fi
+ done
+ echo -1 >"$DIR"/"$name"/loading
+}
+
+test_request_firmware_nowait_custom_nofile()
+{
+ echo -n "Batched request_firmware_nowait(uevent=false) nofile try #$1: "
+ config_reset
+ config_unset_uevent
+ RANDOM_FILE_PATH=$(setup_random_file_fake)
+ RANDOM_FILE="$(basename $RANDOM_FILE_PATH)"
+ config_set_name $RANDOM_FILE
+ config_trigger_async &
+ test_wait_and_cancel_custom_load $RANDOM_FILE
+ wait
+ release_all_firmware
+ echo "OK"
+}
+
+test_batched_request_firmware()
+{
+ echo -n "Batched request_firmware() try #$1: "
+ config_reset
+ config_trigger_sync
+ read_firmwares
+ release_all_firmware
+ echo "OK"
+}
+
+test_batched_request_firmware_direct()
+{
+ echo -n "Batched request_firmware_direct() try #$1: "
+ config_reset
+ config_set_sync_direct
+ config_trigger_sync
+ release_all_firmware
+ echo "OK"
+}
+
+test_request_firmware_nowait_uevent()
+{
+ echo -n "Batched request_firmware_nowait(uevent=true) try #$1: "
+ config_reset
+ config_trigger_async
+ release_all_firmware
+ echo "OK"
+}
+
+test_request_firmware_nowait_custom()
+{
+ echo -n "Batched request_firmware_nowait(uevent=false) try #$1: "
+ config_reset
+ config_unset_uevent
+ RANDOM_FILE_PATH=$(setup_random_file)
+ RANDOM_FILE="$(basename $RANDOM_FILE_PATH)"
+ config_set_name $RANDOM_FILE
+ config_trigger_async
+ release_all_firmware
+ echo "OK"
+}
+
+# Only continue if batched request triggers are present on the
+# test-firmware driver
+test_config_present
+
+# test with the file present
+echo
+echo "Testing with the file present..."
+for i in $(seq 1 5); do
+ test_batched_request_firmware $i
+done
+
+for i in $(seq 1 5); do
+ test_batched_request_firmware_direct $i
+done
+
+for i in $(seq 1 5); do
+ test_request_firmware_nowait_uevent $i
+done
+
+for i in $(seq 1 5); do
+ test_request_firmware_nowait_custom $i
+done
+
+# Test for file not found, errors are expected, the failure would be
+# a hung task, which would require a hard reset.
+echo
+echo "Testing with the file missing..."
+for i in $(seq 1 5); do
+ test_batched_request_firmware_nofile $i
+done
+
+for i in $(seq 1 5); do
+ test_batched_request_firmware_direct_nofile $i
+done
+
+for i in $(seq 1 5); do
+ test_request_firmware_nowait_uevent_nofile $i
+done
+
+for i in $(seq 1 5); do
+ test_request_firmware_nowait_custom_nofile $i
+done
+
+exit 0
diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh
new file mode 100755
index 000000000..8a853ace5
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_lib.sh
@@ -0,0 +1,205 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Library of helpers for test scripts.
+set -e
+
+DIR=/sys/devices/virtual/misc/test_firmware
+
+PROC_CONFIG="/proc/config.gz"
+TEST_DIR=$(dirname $0)
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+print_reqs_exit()
+{
+ echo "You must have the following enabled in your kernel:" >&2
+ cat $TEST_DIR/config >&2
+ exit $ksft_skip
+}
+
+test_modprobe()
+{
+ if [ ! -d $DIR ]; then
+ print_reqs_exit
+ fi
+}
+
+check_mods()
+{
+ local uid=$(id -u)
+ if [ $uid -ne 0 ]; then
+ echo "skip all tests: must be run as root" >&2
+ exit $ksft_skip
+ fi
+
+ trap "test_modprobe" EXIT
+ if [ ! -d $DIR ]; then
+ modprobe test_firmware
+ fi
+ if [ ! -f $PROC_CONFIG ]; then
+ if modprobe configs 2>/dev/null; then
+ echo "Loaded configs module"
+ if [ ! -f $PROC_CONFIG ]; then
+ echo "You must have the following enabled in your kernel:" >&2
+ cat $TEST_DIR/config >&2
+ echo "Resorting to old heuristics" >&2
+ fi
+ else
+ echo "Failed to load configs module, using old heuristics" >&2
+ fi
+ fi
+}
+
+check_setup()
+{
+ HAS_FW_LOADER_USER_HELPER="$(kconfig_has CONFIG_FW_LOADER_USER_HELPER=y)"
+ HAS_FW_LOADER_USER_HELPER_FALLBACK="$(kconfig_has CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y)"
+ PROC_FW_IGNORE_SYSFS_FALLBACK="0"
+ PROC_FW_FORCE_SYSFS_FALLBACK="0"
+
+ if [ -z $PROC_SYS_DIR ]; then
+ PROC_SYS_DIR="/proc/sys/kernel"
+ fi
+
+ FW_PROC="${PROC_SYS_DIR}/firmware_config"
+ FW_FORCE_SYSFS_FALLBACK="$FW_PROC/force_sysfs_fallback"
+ FW_IGNORE_SYSFS_FALLBACK="$FW_PROC/ignore_sysfs_fallback"
+
+ if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then
+ PROC_FW_FORCE_SYSFS_FALLBACK="$(cat $FW_FORCE_SYSFS_FALLBACK)"
+ fi
+
+ if [ -f $FW_IGNORE_SYSFS_FALLBACK ]; then
+ PROC_FW_IGNORE_SYSFS_FALLBACK="$(cat $FW_IGNORE_SYSFS_FALLBACK)"
+ fi
+
+ if [ "$PROC_FW_FORCE_SYSFS_FALLBACK" = "1" ]; then
+ HAS_FW_LOADER_USER_HELPER="yes"
+ HAS_FW_LOADER_USER_HELPER_FALLBACK="yes"
+ fi
+
+ if [ "$PROC_FW_IGNORE_SYSFS_FALLBACK" = "1" ]; then
+ HAS_FW_LOADER_USER_HELPER_FALLBACK="no"
+ HAS_FW_LOADER_USER_HELPER="no"
+ fi
+
+ if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ OLD_TIMEOUT="$(cat /sys/class/firmware/timeout)"
+ fi
+
+ OLD_FWPATH="$(cat /sys/module/firmware_class/parameters/path)"
+}
+
+verify_reqs()
+{
+ if [ "$TEST_REQS_FW_SYSFS_FALLBACK" = "yes" ]; then
+ if [ ! "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ echo "usermode helper disabled so ignoring test"
+ exit 0
+ fi
+ fi
+}
+
+setup_tmp_file()
+{
+ FWPATH=$(mktemp -d)
+ FW="$FWPATH/test-firmware.bin"
+ echo "ABCD0123" >"$FW"
+ NAME=$(basename "$FW")
+ if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then
+ echo -n "$FWPATH" >/sys/module/firmware_class/parameters/path
+ fi
+}
+
+__setup_random_file()
+{
+ RANDOM_FILE_PATH="$(mktemp -p $FWPATH)"
+ # mktemp says dry-run -n is unsafe, so...
+ if [[ "$1" = "fake" ]]; then
+ rm -rf $RANDOM_FILE_PATH
+ sync
+ else
+ echo "ABCD0123" >"$RANDOM_FILE_PATH"
+ fi
+ echo $RANDOM_FILE_PATH
+}
+
+setup_random_file()
+{
+ echo $(__setup_random_file)
+}
+
+setup_random_file_fake()
+{
+ echo $(__setup_random_file fake)
+}
+
+proc_set_force_sysfs_fallback()
+{
+ if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then
+ echo -n $1 > $FW_FORCE_SYSFS_FALLBACK
+ check_setup
+ fi
+}
+
+proc_set_ignore_sysfs_fallback()
+{
+ if [ -f $FW_IGNORE_SYSFS_FALLBACK ]; then
+ echo -n $1 > $FW_IGNORE_SYSFS_FALLBACK
+ check_setup
+ fi
+}
+
+proc_restore_defaults()
+{
+ proc_set_force_sysfs_fallback 0
+ proc_set_ignore_sysfs_fallback 0
+}
+
+test_finish()
+{
+ if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout
+ fi
+ if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then
+ if [ "$OLD_FWPATH" = "" ]; then
+ # A zero-length write won't work; write a null byte
+ printf '\000' >/sys/module/firmware_class/parameters/path
+ else
+ echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path
+ fi
+ fi
+ if [ -f $FW ]; then
+ rm -f "$FW"
+ fi
+ if [ -d $FWPATH ]; then
+ rm -rf "$FWPATH"
+ fi
+ proc_restore_defaults
+}
+
+kconfig_has()
+{
+ if [ -f $PROC_CONFIG ]; then
+ if zgrep -q $1 $PROC_CONFIG 2>/dev/null; then
+ echo "yes"
+ else
+ echo "no"
+ fi
+ else
+ # We currently don't have easy heuristics to infer this
+ # so best we can do is just try to use the kernel assuming
+ # you had enabled it. This matches the old behaviour.
+ if [ "$1" = "CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y" ]; then
+ echo "yes"
+ elif [ "$1" = "CONFIG_FW_LOADER_USER_HELPER=y" ]; then
+ if [ -d /sys/class/firmware/ ]; then
+ echo yes
+ else
+ echo no
+ fi
+ fi
+ fi
+}
diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh
new file mode 100755
index 000000000..cffdd4eb0
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_run_tests.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This runs all known tests across all known possible configurations we could
+# emulate in one run.
+
+set -e
+
+TEST_DIR=$(dirname $0)
+source $TEST_DIR/fw_lib.sh
+
+export HAS_FW_LOADER_USER_HELPER=""
+export HAS_FW_LOADER_USER_HELPER_FALLBACK=""
+
+run_tests()
+{
+ proc_set_force_sysfs_fallback $1
+ proc_set_ignore_sysfs_fallback $2
+ $TEST_DIR/fw_filesystem.sh
+
+ proc_set_force_sysfs_fallback $1
+ proc_set_ignore_sysfs_fallback $2
+ $TEST_DIR/fw_fallback.sh
+}
+
+run_test_config_0001()
+{
+ echo "-----------------------------------------------------"
+ echo "Running kernel configuration test 1 -- rare"
+ echo "Emulates:"
+ echo "CONFIG_FW_LOADER=y"
+ echo "CONFIG_FW_LOADER_USER_HELPER=n"
+ echo "CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n"
+ run_tests 0 1
+}
+
+run_test_config_0002()
+{
+ echo "-----------------------------------------------------"
+ echo "Running kernel configuration test 2 -- distro"
+ echo "Emulates:"
+ echo "CONFIG_FW_LOADER=y"
+ echo "CONFIG_FW_LOADER_USER_HELPER=y"
+ echo "CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n"
+ proc_set_ignore_sysfs_fallback 0
+ run_tests 0 0
+}
+
+run_test_config_0003()
+{
+ echo "-----------------------------------------------------"
+ echo "Running kernel configuration test 3 -- android"
+ echo "Emulates:"
+ echo "CONFIG_FW_LOADER=y"
+ echo "CONFIG_FW_LOADER_USER_HELPER=y"
+ echo "CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y"
+ run_tests 1 0
+}
+
+check_mods
+check_setup
+
+if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then
+ run_test_config_0001
+ run_test_config_0002
+ run_test_config_0003
+else
+ echo "Running basic kernel configuration, working with your config"
+ run_tests
+fi
diff --git a/tools/testing/selftests/ftrace/.gitignore b/tools/testing/selftests/ftrace/.gitignore
new file mode 100644
index 000000000..98d8a5a63
--- /dev/null
+++ b/tools/testing/selftests/ftrace/.gitignore
@@ -0,0 +1 @@
+logs
diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
new file mode 100644
index 000000000..cd1f5b3a7
--- /dev/null
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+TEST_PROGS := ftracetest
+TEST_FILES := test.d
+EXTRA_CLEAN := $(OUTPUT)/logs/*
+
+include ../lib.mk
diff --git a/tools/testing/selftests/ftrace/README b/tools/testing/selftests/ftrace/README
new file mode 100644
index 000000000..182e76fa4
--- /dev/null
+++ b/tools/testing/selftests/ftrace/README
@@ -0,0 +1,82 @@
+Linux Ftrace Testcases
+
+This is a collection of testcases for ftrace tracing feature in the Linux
+kernel. Since ftrace exports interfaces via the debugfs, we just need
+shell scripts for testing. Feel free to add new test cases.
+
+Running the ftrace testcases
+============================
+
+At first, you need to be the root user to run this script.
+To run all testcases:
+
+ $ sudo ./ftracetest
+
+To run specific testcases:
+
+ # ./ftracetest test.d/basic3.tc
+
+Or you can also run testcases under given directory:
+
+ # ./ftracetest test.d/kprobe/
+
+Contributing new testcases
+==========================
+
+Copy test.d/template to your testcase (whose filename must have *.tc
+extension) and rewrite the test description line.
+
+ * The working directory of the script is <debugfs>/tracing/.
+
+ * Take care with side effects as the tests are run with root privilege.
+
+ * The tests should not run for a long period of time (more than 1 min.)
+ These are to be unit tests.
+
+ * You can add a directory for your testcases under test.d/ if needed.
+
+ * The test cases should run on dash (busybox shell) for testing on
+ minimal cross-build environments.
+
+ * Note that the tests are run with "set -e" (errexit) option. If any
+ command fails, the test will be terminated immediately.
+
+ * The tests can return some result codes instead of pass or fail by
+ using exit_unresolved, exit_untested, exit_unsupported and exit_xfail.
+
+Result code
+===========
+
+Ftracetest supports following result codes.
+
+ * PASS: The test succeeded as expected. The test which exits with 0 is
+ counted as passed test.
+
+ * FAIL: The test failed, but was expected to succeed. The test which exits
+ with !0 is counted as failed test.
+
+ * UNRESOLVED: The test produced unclear or intermidiate results.
+ for example, the test was interrupted
+ or the test depends on a previous test, which failed.
+ or the test was set up incorrectly
+ The test which is in above situation, must call exit_unresolved.
+
+ * UNTESTED: The test was not run, currently just a placeholder.
+ In this case, the test must call exit_untested.
+
+ * UNSUPPORTED: The test failed because of lack of feature.
+ In this case, the test must call exit_unsupported.
+
+ * XFAIL: The test failed, and was expected to fail.
+ To return XFAIL, call exit_xfail from the test.
+
+There are some sample test scripts for result code under samples/.
+You can also run samples as below:
+
+ # ./ftracetest samples/
+
+TODO
+====
+
+ * Fancy colored output :)
+
diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config
new file mode 100644
index 000000000..07db5ab09
--- /dev/null
+++ b/tools/testing/selftests/ftrace/config
@@ -0,0 +1,9 @@
+CONFIG_KPROBES=y
+CONFIG_FTRACE=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_TRACER_SNAPSHOT=y
+CONFIG_STACK_TRACER=y
+CONFIG_HIST_TRIGGERS=y
+CONFIG_PREEMPT_TRACER=y
+CONFIG_IRQSOFF_TRACER=y
+CONFIG_PREEMPTIRQ_DELAY_TEST=m
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
new file mode 100755
index 000000000..f9a9d424c
--- /dev/null
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -0,0 +1,330 @@
+#!/bin/sh
+
+# ftracetest - Ftrace test shell scripts
+#
+# Copyright (C) Hitachi Ltd., 2014
+# Written by Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+usage() { # errno [message]
+[ ! -z "$2" ] && echo $2
+echo "Usage: ftracetest [options] [testcase(s)] [testcase-directory(s)]"
+echo " Options:"
+echo " -h|--help Show help message"
+echo " -k|--keep Keep passed test logs"
+echo " -v|--verbose Increase verbosity of test messages"
+echo " -vv Alias of -v -v (Show all results in stdout)"
+echo " -vvv Alias of -v -v -v (Show all commands immediately)"
+echo " --fail-unsupported Treat UNSUPPORTED as a failure"
+echo " -d|--debug Debug mode (trace all shell commands)"
+echo " -l|--logdir <dir> Save logs on the <dir>"
+echo " If <dir> is -, all logs output in console only"
+exit $1
+}
+
+errexit() { # message
+ echo "Error: $1" 1>&2
+ exit 1
+}
+
+# Ensuring user privilege
+if [ `id -u` -ne 0 ]; then
+ errexit "this must be run by root user"
+fi
+
+# Utilities
+absdir() { # file_path
+ (cd `dirname $1`; pwd)
+}
+
+abspath() {
+ echo `absdir $1`/`basename $1`
+}
+
+find_testcases() { #directory
+ echo `find $1 -name \*.tc | sort`
+}
+
+parse_opts() { # opts
+ local OPT_TEST_CASES=
+ local OPT_TEST_DIR=
+
+ while [ ! -z "$1" ]; do
+ case "$1" in
+ --help|-h)
+ usage 0
+ ;;
+ --keep|-k)
+ KEEP_LOG=1
+ shift 1
+ ;;
+ --verbose|-v|-vv|-vvv)
+ VERBOSE=$((VERBOSE + 1))
+ [ $1 = '-vv' ] && VERBOSE=$((VERBOSE + 1))
+ [ $1 = '-vvv' ] && VERBOSE=$((VERBOSE + 2))
+ shift 1
+ ;;
+ --debug|-d)
+ DEBUG=1
+ shift 1
+ ;;
+ --fail-unsupported)
+ UNSUPPORTED_RESULT=1
+ shift 1
+ ;;
+ --logdir|-l)
+ LOG_DIR=$2
+ shift 2
+ ;;
+ *.tc)
+ if [ -f "$1" ]; then
+ OPT_TEST_CASES="$OPT_TEST_CASES `abspath $1`"
+ shift 1
+ else
+ usage 1 "$1 is not a testcase"
+ fi
+ ;;
+ *)
+ if [ -d "$1" ]; then
+ OPT_TEST_DIR=`abspath $1`
+ OPT_TEST_CASES="$OPT_TEST_CASES `find_testcases $OPT_TEST_DIR`"
+ shift 1
+ else
+ usage 1 "Invalid option ($1)"
+ fi
+ ;;
+ esac
+ done
+ if [ ! -z "$OPT_TEST_CASES" ]; then
+ TEST_CASES=$OPT_TEST_CASES
+ fi
+}
+
+# Parameters
+DEBUGFS_DIR=`grep debugfs /proc/mounts | cut -f2 -d' ' | head -1`
+if [ -z "$DEBUGFS_DIR" ]; then
+ TRACING_DIR=`grep tracefs /proc/mounts | cut -f2 -d' ' | head -1`
+else
+ TRACING_DIR=$DEBUGFS_DIR/tracing
+fi
+
+TOP_DIR=`absdir $0`
+TEST_DIR=$TOP_DIR/test.d
+TEST_CASES=`find_testcases $TEST_DIR`
+LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`/
+KEEP_LOG=0
+DEBUG=0
+VERBOSE=0
+UNSUPPORTED_RESULT=0
+# Parse command-line options
+parse_opts $*
+
+[ $DEBUG -ne 0 ] && set -x
+
+# Verify parameters
+if [ -z "$TRACING_DIR" -o ! -d "$TRACING_DIR" ]; then
+ errexit "No ftrace directory found"
+fi
+
+# Preparing logs
+if [ "x$LOG_DIR" = "x-" ]; then
+ LOG_FILE=
+ date
+else
+ LOG_FILE=$LOG_DIR/ftracetest.log
+ mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR"
+ date > $LOG_FILE
+fi
+
+prlog() { # messages
+ [ -z "$LOG_FILE" ] && echo "$@" || echo "$@" | tee -a $LOG_FILE
+}
+catlog() { #file
+ [ -z "$LOG_FILE" ] && cat $1 || cat $1 | tee -a $LOG_FILE
+}
+prlog "=== Ftrace unit tests ==="
+
+
+# Testcase management
+# Test result codes - Dejagnu extended code
+PASS=0 # The test succeeded.
+FAIL=1 # The test failed, but was expected to succeed.
+UNRESOLVED=2 # The test produced indeterminate results. (e.g. interrupted)
+UNTESTED=3 # The test was not run, currently just a placeholder.
+UNSUPPORTED=4 # The test failed because of lack of feature.
+XFAIL=5 # The test failed, and was expected to fail.
+
+# Accumulations
+PASSED_CASES=
+FAILED_CASES=
+UNRESOLVED_CASES=
+UNTESTED_CASES=
+UNSUPPORTED_CASES=
+XFAILED_CASES=
+UNDEFINED_CASES=
+TOTAL_RESULT=0
+
+INSTANCE=
+CASENO=0
+testcase() { # testfile
+ CASENO=$((CASENO+1))
+ desc=`grep "^#[ \t]*description:" $1 | cut -f2 -d:`
+ prlog -n "[$CASENO]$INSTANCE$desc"
+}
+
+test_on_instance() { # testfile
+ grep -q "^#[ \t]*flags:.*instance" $1
+}
+
+eval_result() { # sigval
+ case $1 in
+ $PASS)
+ prlog " [PASS]"
+ PASSED_CASES="$PASSED_CASES $CASENO"
+ return 0
+ ;;
+ $FAIL)
+ prlog " [FAIL]"
+ FAILED_CASES="$FAILED_CASES $CASENO"
+ return 1 # this is a bug.
+ ;;
+ $UNRESOLVED)
+ prlog " [UNRESOLVED]"
+ UNRESOLVED_CASES="$UNRESOLVED_CASES $CASENO"
+ return 1 # this is a kind of bug.. something happened.
+ ;;
+ $UNTESTED)
+ prlog " [UNTESTED]"
+ UNTESTED_CASES="$UNTESTED_CASES $CASENO"
+ return 0
+ ;;
+ $UNSUPPORTED)
+ prlog " [UNSUPPORTED]"
+ UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO"
+ return $UNSUPPORTED_RESULT # depends on use case
+ ;;
+ $XFAIL)
+ prlog " [XFAIL]"
+ XFAILED_CASES="$XFAILED_CASES $CASENO"
+ return 0
+ ;;
+ *)
+ prlog " [UNDEFINED]"
+ UNDEFINED_CASES="$UNDEFINED_CASES $CASENO"
+ return 1 # this must be a test bug
+ ;;
+ esac
+}
+
+# Signal handling for result codes
+SIG_RESULT=
+SIG_BASE=36 # Use realtime signals
+SIG_PID=$$
+
+exit_pass () {
+ exit 0
+}
+
+SIG_FAIL=$((SIG_BASE + FAIL))
+exit_fail () {
+ exit 1
+}
+trap 'SIG_RESULT=$FAIL' $SIG_FAIL
+
+SIG_UNRESOLVED=$((SIG_BASE + UNRESOLVED))
+exit_unresolved () {
+ kill -s $SIG_UNRESOLVED $SIG_PID
+ exit 0
+}
+trap 'SIG_RESULT=$UNRESOLVED' $SIG_UNRESOLVED
+
+SIG_UNTESTED=$((SIG_BASE + UNTESTED))
+exit_untested () {
+ kill -s $SIG_UNTESTED $SIG_PID
+ exit 0
+}
+trap 'SIG_RESULT=$UNTESTED' $SIG_UNTESTED
+
+SIG_UNSUPPORTED=$((SIG_BASE + UNSUPPORTED))
+exit_unsupported () {
+ kill -s $SIG_UNSUPPORTED $SIG_PID
+ exit 0
+}
+trap 'SIG_RESULT=$UNSUPPORTED' $SIG_UNSUPPORTED
+
+SIG_XFAIL=$((SIG_BASE + XFAIL))
+exit_xfail () {
+ kill -s $SIG_XFAIL $SIG_PID
+ exit 0
+}
+trap 'SIG_RESULT=$XFAIL' $SIG_XFAIL
+
+__run_test() { # testfile
+ # setup PID and PPID, $$ is not updated.
+ (cd $TRACING_DIR; read PID _ < /proc/self/stat; set -e; set -x; initialize_ftrace; . $1)
+ [ $? -ne 0 ] && kill -s $SIG_FAIL $SIG_PID
+}
+
+# Run one test case
+run_test() { # testfile
+ local testname=`basename $1`
+ if [ ! -z "$LOG_FILE" ] ; then
+ local testlog=`mktemp $LOG_DIR/${testname}-log.XXXXXX`
+ else
+ local testlog=/proc/self/fd/1
+ fi
+ export TMPDIR=`mktemp -d /tmp/ftracetest-dir.XXXXXX`
+ testcase $1
+ echo "execute$INSTANCE: "$1 > $testlog
+ SIG_RESULT=0
+ if [ -z "$LOG_FILE" ]; then
+ __run_test $1 2>&1
+ elif [ $VERBOSE -ge 3 ]; then
+ __run_test $1 | tee -a $testlog 2>&1
+ elif [ $VERBOSE -eq 2 ]; then
+ __run_test $1 2>> $testlog | tee -a $testlog
+ else
+ __run_test $1 >> $testlog 2>&1
+ fi
+ eval_result $SIG_RESULT
+ if [ $? -eq 0 ]; then
+ # Remove test log if the test was done as it was expected.
+ [ $KEEP_LOG -eq 0 -a ! -z "$LOG_FILE" ] && rm $testlog
+ else
+ [ $VERBOSE -eq 1 -o $VERBOSE -eq 2 ] && catlog $testlog
+ TOTAL_RESULT=1
+ fi
+ rm -rf $TMPDIR
+}
+
+# load in the helper functions
+. $TEST_DIR/functions
+
+# Main loop
+for t in $TEST_CASES; do
+ run_test $t
+done
+
+# Test on instance loop
+INSTANCE=" (instance) "
+for t in $TEST_CASES; do
+ test_on_instance $t || continue
+ SAVED_TRACING_DIR=$TRACING_DIR
+ export TRACING_DIR=`mktemp -d $TRACING_DIR/instances/ftracetest.XXXXXX`
+ run_test $t
+ rmdir $TRACING_DIR
+ TRACING_DIR=$SAVED_TRACING_DIR
+done
+
+prlog ""
+prlog "# of passed: " `echo $PASSED_CASES | wc -w`
+prlog "# of failed: " `echo $FAILED_CASES | wc -w`
+prlog "# of unresolved: " `echo $UNRESOLVED_CASES | wc -w`
+prlog "# of untested: " `echo $UNTESTED_CASES | wc -w`
+prlog "# of unsupported: " `echo $UNSUPPORTED_CASES | wc -w`
+prlog "# of xfailed: " `echo $XFAILED_CASES | wc -w`
+prlog "# of undefined(test bug): " `echo $UNDEFINED_CASES | wc -w`
+
+# if no error, return 0
+exit $TOTAL_RESULT
diff --git a/tools/testing/selftests/ftrace/samples/fail.tc b/tools/testing/selftests/ftrace/samples/fail.tc
new file mode 100644
index 000000000..15e35b956
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/fail.tc
@@ -0,0 +1,4 @@
+#!/bin/sh
+# description: failure-case example
+cat non-exist-file
+echo "this is not executed"
diff --git a/tools/testing/selftests/ftrace/samples/pass.tc b/tools/testing/selftests/ftrace/samples/pass.tc
new file mode 100644
index 000000000..d01549370
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/pass.tc
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: pass-case example
+return 0
diff --git a/tools/testing/selftests/ftrace/samples/unresolved.tc b/tools/testing/selftests/ftrace/samples/unresolved.tc
new file mode 100644
index 000000000..41e99d335
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/unresolved.tc
@@ -0,0 +1,4 @@
+#!/bin/sh
+# description: unresolved-case example
+trap exit_unresolved INT
+kill -INT $PID
diff --git a/tools/testing/selftests/ftrace/samples/unsupported.tc b/tools/testing/selftests/ftrace/samples/unsupported.tc
new file mode 100644
index 000000000..45910ff13
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/unsupported.tc
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: unsupported-case example
+exit_unsupported
diff --git a/tools/testing/selftests/ftrace/samples/untested.tc b/tools/testing/selftests/ftrace/samples/untested.tc
new file mode 100644
index 000000000..35a45946e
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/untested.tc
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: untested-case example
+exit_untested
diff --git a/tools/testing/selftests/ftrace/samples/xfail.tc b/tools/testing/selftests/ftrace/samples/xfail.tc
new file mode 100644
index 000000000..9dd395323
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/xfail.tc
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: xfail-case example
+cat non-exist-file || exit_xfail
diff --git a/tools/testing/selftests/ftrace/settings b/tools/testing/selftests/ftrace/settings
new file mode 100644
index 000000000..e7b941753
--- /dev/null
+++ b/tools/testing/selftests/ftrace/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic1.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic1.tc
new file mode 100644
index 000000000..9980ff14a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/basic1.tc
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: Basic trace file check
+test -f README -a -f trace -a -f tracing_on -a -f trace_pipe
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc
new file mode 100644
index 000000000..531e47236
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc
@@ -0,0 +1,9 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Basic test for tracers
+# flags: instance
+test -f available_tracers
+for t in `cat available_tracers`; do
+ echo $t > current_tracer
+done
+echo nop > current_tracer
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc
new file mode 100644
index 000000000..58a2506f7
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc
@@ -0,0 +1,10 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Basic trace clock test
+# flags: instance
+test -f trace_clock
+for c in `cat trace_clock | tr -d \[\]`; do
+ echo $c > trace_clock
+ grep '\['$c'\]' trace_clock
+done
+echo local > trace_clock
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc
new file mode 100644
index 000000000..0696098d6
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc
@@ -0,0 +1,5 @@
+#!/bin/sh
+# description: Basic event tracing check
+test -f available_events -a -f set_event -a -d events
+# check scheduler events are available
+grep -q sched available_events && exit_pass || exit_fail
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc
new file mode 100644
index 000000000..3b1f45e13
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc
@@ -0,0 +1,28 @@
+#!/bin/sh
+# description: Snapshot and tracing setting
+# flags: instance
+
+[ ! -f snapshot ] && exit_unsupported
+
+echo "Set tracing off"
+echo 0 > tracing_on
+
+echo "Allocate and take a snapshot"
+echo 1 > snapshot
+
+# Since trace buffer is empty, snapshot is also empty, but allocated
+grep -q "Snapshot is allocated" snapshot
+
+echo "Ensure keep tracing off"
+test `cat tracing_on` -eq 0
+
+echo "Set tracing on"
+echo 1 > tracing_on
+
+echo "Take a snapshot again"
+echo 1 > snapshot
+
+echo "Ensure keep tracing on"
+test `cat tracing_on` -eq 1
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
new file mode 100644
index 000000000..9daf03418
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event tracing - enable/disable with event level files
+# flags: instance
+
+do_reset() {
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+yield() {
+ ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo 'sched:sched_switch' > set_event
+
+yield
+
+count=`cat trace | grep sched_switch | wc -l`
+if [ $count -eq 0 ]; then
+ fail "sched_switch events are not recorded"
+fi
+
+do_reset
+
+echo 1 > events/sched/sched_switch/enable
+
+yield
+
+count=`cat trace | grep sched_switch | wc -l`
+if [ $count -eq 0 ]; then
+ fail "sched_switch events are not recorded"
+fi
+
+do_reset
+
+echo 0 > events/sched/sched_switch/enable
+
+yield
+
+count=`cat trace | grep sched_switch | wc -l`
+if [ $count -ne 0 ]; then
+ fail "sched_switch events should not be recorded"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc
new file mode 100644
index 000000000..132478b30
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event tracing - restricts events based on pid
+# flags: instance
+
+do_reset() {
+ echo > set_event
+ echo > set_event_pid
+ echo 0 > options/event-fork
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+yield() {
+ ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f set_event_pid ]; then
+ echo "event pid filtering is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo 1 > events/sched/sched_switch/enable
+
+yield
+
+count=`cat trace | grep sched_switch | wc -l`
+if [ $count -eq 0 ]; then
+ fail "sched_switch events are not recorded"
+fi
+
+do_reset
+
+read mypid rest < /proc/self/stat
+
+echo $mypid > set_event_pid
+echo 'sched:sched_switch' > set_event
+
+yield
+
+count=`cat trace | grep sched_switch | grep -v "pid=$mypid" | wc -l`
+if [ $count -ne 0 ]; then
+ fail "sched_switch events from other task are recorded"
+fi
+
+do_reset
+
+echo $mypid > set_event_pid
+echo 1 > options/event-fork
+echo 1 > events/sched/sched_switch/enable
+
+yield
+
+count=`cat trace | grep sched_switch | grep -v "pid=$mypid" | wc -l`
+if [ $count -eq 0 ]; then
+ fail "sched_switch events from other task are not recorded"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
new file mode 100644
index 000000000..6a37a8642
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event tracing - enable/disable with subsystem level files
+# flags: instance
+
+do_reset() {
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+yield() {
+ ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo 'sched:*' > set_event
+
+yield
+
+count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+if [ $count -lt 3 ]; then
+ fail "at least fork, exec and exit events should be recorded"
+fi
+
+do_reset
+
+echo 1 > events/sched/enable
+
+yield
+
+count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+if [ $count -lt 3 ]; then
+ fail "at least fork, exec and exit events should be recorded"
+fi
+
+do_reset
+
+echo 0 > events/sched/enable
+
+yield
+
+count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+if [ $count -ne 0 ]; then
+ fail "any of scheduler events should not be recorded"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
new file mode 100644
index 000000000..4e9b6e2c0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
@@ -0,0 +1,65 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event tracing - enable/disable with top level files
+
+do_reset() {
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+yield() {
+ ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
+}
+
+if [ ! -f available_events -o ! -f set_event -o ! -d events ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo '*:*' > set_event
+
+yield
+
+echo 0 > tracing_on
+
+count=`head -n 128 trace | grep -v ^# | wc -l`
+if [ $count -eq 0 ]; then
+ fail "none of events are recorded"
+fi
+
+do_reset
+
+echo 1 > events/enable
+echo 1 > tracing_on
+
+yield
+
+echo 0 > tracing_on
+count=`head -n 128 trace | grep -v ^# | wc -l`
+if [ $count -eq 0 ]; then
+ fail "none of events are recorded"
+fi
+
+do_reset
+
+echo 0 > events/enable
+
+yield
+
+count=`cat trace | grep -v ^# | wc -l`
+if [ $count -ne 0 ]; then
+ fail "any of events should not be recorded"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
new file mode 100644
index 000000000..1aec99d10
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
@@ -0,0 +1,92 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function graph filters with stack tracer
+
+# Make sure that function graph filtering works, and is not
+# affected by other tracers enabled (like stack tracer)
+
+if ! grep -q function_graph available_tracers; then
+ echo "no function graph tracer configured"
+ exit_unsupported
+fi
+
+if [ ! -f set_ftrace_filter ]; then
+ echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
+ exit_unsupported
+fi
+
+do_reset() {
+ reset_tracer
+ if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then
+ echo 0 > /proc/sys/kernel/stack_tracer_enabled
+ fi
+ enable_tracing
+ clear_trace
+ echo > set_ftrace_filter
+}
+
+fail() { # msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+disable_tracing
+clear_trace;
+
+# filter something, schedule is always good
+if ! echo "schedule" > set_ftrace_filter; then
+ # test for powerpc 64
+ if ! echo ".schedule" > set_ftrace_filter; then
+ fail "can not enable schedule filter"
+ fi
+fi
+
+echo function_graph > current_tracer
+
+if [ ! -f stack_trace ]; then
+ echo "Stack tracer not configured"
+ do_reset
+ exit_unsupported;
+fi
+
+echo "Now testing with stack tracer"
+
+echo 1 > /proc/sys/kernel/stack_tracer_enabled
+
+disable_tracing
+clear_trace
+enable_tracing
+sleep 1
+
+count=`cat trace | grep '()' | grep -v schedule | wc -l`
+
+if [ $count -ne 0 ]; then
+ fail "Graph filtering not working with stack tracer?"
+fi
+
+# Make sure we did find something
+count=`cat trace | grep 'schedule()' | wc -l`
+if [ $count -eq 0 ]; then
+ fail "No schedule traces found?"
+fi
+
+echo 0 > /proc/sys/kernel/stack_tracer_enabled
+clear_trace
+sleep 1
+
+
+count=`cat trace | grep '()' | grep -v schedule | wc -l`
+
+if [ $count -ne 0 ]; then
+ fail "Graph filtering not working after stack tracer disabled?"
+fi
+
+count=`cat trace | grep 'schedule()' | wc -l`
+if [ $count -eq 0 ]; then
+ fail "No schedule traces found?"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
new file mode 100644
index 000000000..9f8d27ca3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
@@ -0,0 +1,53 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function graph filters
+
+# Make sure that function graph filtering works
+
+if ! grep -q function_graph available_tracers; then
+ echo "no function graph tracer configured"
+ exit_unsupported
+fi
+
+do_reset() {
+ reset_tracer
+ enable_tracing
+ clear_trace
+}
+
+fail() { # msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+disable_tracing
+clear_trace
+
+# filter something, schedule is always good
+if ! echo "schedule" > set_ftrace_filter; then
+ # test for powerpc 64
+ if ! echo ".schedule" > set_ftrace_filter; then
+ fail "can not enable schedule filter"
+ fi
+fi
+
+echo function_graph > current_tracer
+enable_tracing
+sleep 1
+# search for functions (has "()" on the line), and make sure
+# that only the schedule function was found
+count=`cat trace | grep '()' | grep -v schedule | wc -l`
+if [ $count -ne 0 ]; then
+ fail "Graph filtering not working by itself?"
+fi
+
+# Make sure we did find something
+count=`cat trace | grep 'schedule()' | wc -l`
+if [ $count -eq 0 ]; then
+ fail "No schedule traces found?"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
new file mode 100644
index 000000000..f4e92afab
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function glob filters
+
+# Make sure that function glob matching filter works.
+
+if ! grep -q function available_tracers; then
+ echo "no function tracer configured"
+ exit_unsupported
+fi
+
+disable_tracing
+clear_trace
+
+ftrace_filter_check() { # glob grep
+ echo "$1" > set_ftrace_filter
+ cut -f1 -d" " set_ftrace_filter > $TMPDIR/actual
+ cut -f1 -d" " available_filter_functions | grep "$2" > $TMPDIR/expected
+ DIFF=`diff $TMPDIR/actual $TMPDIR/expected`
+ test -z "$DIFF"
+}
+
+# filter by *, front match
+ftrace_filter_check '*schedule' '^.*schedule$'
+
+# filter by *, middle match
+ftrace_filter_check '*schedule*' '^.*schedule.*$'
+
+# filter by *, end match
+ftrace_filter_check 'schedule*' '^schedule.*$'
+
+# filter by *mid*end
+ftrace_filter_check '*pin*lock' '.*pin.*lock$'
+
+# filter by start*mid*
+ftrace_filter_check 'mutex*try*' '^mutex.*try.*'
+
+# Advanced full-glob matching feature is recently supported.
+# Skip the tests if we are sure the kernel does not support it.
+if grep -q 'accepts: .* glob-matching-pattern' README ; then
+
+# filter by *, both side match
+ftrace_filter_check 'sch*ule' '^sch.*ule$'
+
+# filter by char class.
+ftrace_filter_check '[Ss]y[Ss]_*' '^[Ss]y[Ss]_.*$'
+
+# filter by ?, schedule is always good
+if ! echo "sch?dule" > set_ftrace_filter; then
+ # test for powerpc 64
+ if ! echo ".sch?dule" > set_ftrace_filter; then
+ fail "can not enable schedule filter"
+ fi
+ cat set_ftrace_filter | grep '^.schedule$'
+else
+ cat set_ftrace_filter | grep '^schedule$'
+fi
+
+fi
+
+echo > set_ftrace_filter
+enable_tracing
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
new file mode 100644
index 000000000..524ce24b3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
@@ -0,0 +1,118 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function pid filters
+
+# Make sure that function pid matching filter works.
+# Also test it on an instance directory
+
+if ! grep -q function available_tracers; then
+ echo "no function tracer configured"
+ exit_unsupported
+fi
+
+if [ ! -f set_ftrace_pid ]; then
+ echo "set_ftrace_pid not found? Is function tracer not set?"
+ exit_unsupported
+fi
+
+if [ ! -f set_ftrace_filter ]; then
+ echo "set_ftrace_filter not found? Is function tracer not set?"
+ exit_unsupported
+fi
+
+do_function_fork=1
+
+if [ ! -f options/function-fork ]; then
+ do_function_fork=0
+ echo "no option for function-fork found. Option will not be tested."
+fi
+
+read PID _ < /proc/self/stat
+
+if [ $do_function_fork -eq 1 ]; then
+ # default value of function-fork option
+ orig_value=`grep function-fork trace_options`
+fi
+
+do_reset() {
+ reset_tracer
+ clear_trace
+ enable_tracing
+ echo > set_ftrace_filter
+ echo > set_ftrace_pid
+
+ if [ $do_function_fork -eq 0 ]; then
+ return
+ fi
+
+ echo $orig_value > trace_options
+}
+
+fail() { # msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+yield() {
+ ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
+}
+
+do_test() {
+ disable_tracing
+
+ echo do_execve* > set_ftrace_filter
+ echo *do_fork >> set_ftrace_filter
+
+ echo $PID > set_ftrace_pid
+ echo function > current_tracer
+
+ if [ $do_function_fork -eq 1 ]; then
+ # don't allow children to be traced
+ echo nofunction-fork > trace_options
+ fi
+
+ enable_tracing
+ yield
+
+ count_pid=`cat trace | grep -v ^# | grep $PID | wc -l`
+ count_other=`cat trace | grep -v ^# | grep -v $PID | wc -l`
+
+ # count_other should be 0
+ if [ $count_pid -eq 0 -o $count_other -ne 0 ]; then
+ fail "PID filtering not working?"
+ fi
+
+ disable_tracing
+ clear_trace
+
+ if [ $do_function_fork -eq 0 ]; then
+ return
+ fi
+
+ # allow children to be traced
+ echo function-fork > trace_options
+
+ enable_tracing
+ yield
+
+ count_pid=`cat trace | grep -v ^# | grep $PID | wc -l`
+ count_other=`cat trace | grep -v ^# | grep -v $PID | wc -l`
+
+ # count_other should NOT be 0
+ if [ $count_pid -eq 0 -o $count_other -eq 0 ]; then
+ fail "PID filtering not following fork?"
+ fi
+}
+
+do_test
+
+mkdir instances/foo
+cd instances/foo
+do_test
+cd ../../
+rmdir instances/foo
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
new file mode 100644
index 000000000..6fed4cf2d
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
@@ -0,0 +1,123 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - test for function event triggers
+# flags: instance
+#
+# Ftrace allows to add triggers to functions, such as enabling or disabling
+# tracing, enabling or disabling trace events, or recording a stack trace
+# within the ring buffer.
+#
+# This test is designed to test event triggers
+#
+
+# The triggers are set within the set_ftrace_filter file
+if [ ! -f set_ftrace_filter ]; then
+ echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
+ exit_unsupported
+fi
+
+do_reset() {
+ reset_ftrace_filter
+ reset_tracer
+ disable_events
+ clear_trace
+ enable_tracing
+}
+
+fail() { # mesg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+SLEEP_TIME=".1"
+
+do_reset
+
+echo "Testing function probes with events:"
+
+EVENT="sched:sched_switch"
+EVENT_ENABLE="events/sched/sched_switch/enable"
+
+cnt_trace() {
+ grep -v '^#' trace | wc -l
+}
+
+test_event_enabled() {
+ val=$1
+
+ e=`cat $EVENT_ENABLE`
+ if [ "$e" != $val ]; then
+ fail "Expected $val but found $e"
+ fi
+}
+
+run_enable_disable() {
+ enable=$1 # enable
+ Enable=$2 # Enable
+ check_disable=$3 # 0
+ check_enable_star=$4 # 1*
+ check_disable_star=$5 # 0*
+
+ cnt=`cnt_trace`
+ if [ $cnt -ne 0 ]; then
+ fail "Found junk in trace file"
+ fi
+
+ echo "$Enable event all the time"
+
+ echo $check_disable > $EVENT_ENABLE
+ sleep $SLEEP_TIME
+
+ test_event_enabled $check_disable
+
+ echo "schedule:${enable}_event:$EVENT" > set_ftrace_filter
+ if [ -d ../../instances ]; then # Check instances
+ cur=`cat set_ftrace_filter`
+ top=`cat ../../set_ftrace_filter`
+ if [ "$cur" = "$top" ]; then
+ echo "This kernel is too old to support per instance filter"
+ reset_ftrace_filter
+ exit_unsupported
+ fi
+ fi
+
+ echo " make sure it works 5 times"
+
+ for i in `seq 5`; do
+ sleep $SLEEP_TIME
+ echo " test $i"
+ test_event_enabled $check_enable_star
+
+ echo $check_disable > $EVENT_ENABLE
+ done
+ sleep $SLEEP_TIME
+ echo " make sure it's still works"
+ test_event_enabled $check_enable_star
+
+ reset_ftrace_filter
+
+ echo " make sure it only works 3 times"
+
+ echo $check_disable > $EVENT_ENABLE
+ sleep $SLEEP_TIME
+
+ echo "schedule:${enable}_event:$EVENT:3" > set_ftrace_filter
+
+ for i in `seq 3`; do
+ sleep $SLEEP_TIME
+ echo " test $i"
+ test_event_enabled $check_enable_star
+
+ echo $check_disable > $EVENT_ENABLE
+ done
+
+ sleep $SLEEP_TIME
+ echo " make sure it stop working"
+ test_event_enabled $check_disable_star
+
+ do_reset
+}
+
+run_enable_disable enable Enable 0 "1*" "0*"
+run_enable_disable disable Disable 1 "0*" "1*"
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
new file mode 100644
index 000000000..b2d5a8feb
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
@@ -0,0 +1,81 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function profiler with function tracing
+
+# There was a bug after a rewrite of the ftrace infrastructure that
+# caused the function_profiler not to be able to run with the function
+# tracer, because the function_profiler used the function_graph tracer
+# and it was assumed the two could not run simultaneously.
+#
+# There was another related bug where the solution to the first bug
+# broke the way filtering of the function tracer worked.
+#
+# This test triggers those bugs on those kernels.
+#
+# We need function_graph and profiling to to run this test
+if ! grep -q function_graph available_tracers; then
+ echo "no function graph tracer configured"
+ exit_unsupported;
+fi
+
+if [ ! -f set_ftrace_filter ]; then
+ echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
+ exit_unsupported
+fi
+
+if [ ! -f function_profile_enabled ]; then
+ echo "function_profile_enabled not found, function profiling enabled?"
+ exit_unsupported
+fi
+
+fail() { # mesg
+ reset_tracer
+ echo > set_ftrace_filter
+ echo $1
+ exit_fail
+}
+
+echo "Testing function tracer with profiler:"
+echo "enable function tracer"
+echo function > current_tracer
+echo "enable profiler"
+echo 1 > function_profile_enabled
+
+sleep 1
+
+echo "Now filter on just schedule"
+echo '*schedule' > set_ftrace_filter
+clear_trace
+
+echo "Now disable function profiler"
+echo 0 > function_profile_enabled
+
+sleep 1
+
+# make sure only schedule functions exist
+
+echo "testing if only schedule is being traced"
+if grep -v -e '^#' -e 'schedule' trace; then
+ fail "more than schedule was found"
+fi
+
+echo "Make sure schedule was traced"
+if ! grep -e 'schedule' trace > /dev/null; then
+ cat trace
+ fail "can not find schedule in trace"
+fi
+
+echo > set_ftrace_filter
+clear_trace
+
+sleep 1
+
+echo "make sure something other than scheduler is being traced"
+if ! grep -v -e '^#' -e 'schedule' trace > /dev/null; then
+ cat trace
+ fail "no other functions besides schedule was found"
+fi
+
+reset_tracer
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
new file mode 100644
index 000000000..68e7a48f5
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
@@ -0,0 +1,170 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - test reading of set_ftrace_filter
+#
+# The set_ftrace_filter file of ftrace is used to list functions as well as
+# triggers (probes) attached to functions. The code to read this file is not
+# straight forward and has had various bugs in the past. This test is designed
+# to add functions and triggers to that file in various ways and read that
+# file in various ways (cat vs dd).
+#
+
+# The triggers are set within the set_ftrace_filter file
+if [ ! -f set_ftrace_filter ]; then
+ echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
+ exit_unsupported
+fi
+
+do_reset() {
+ reset_tracer
+ reset_ftrace_filter
+ disable_events
+ clear_trace
+ enable_tracing
+}
+
+fail() { # mesg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+do_reset
+
+FILTER=set_ftrace_filter
+FUNC1="schedule"
+FUNC2="do_softirq"
+
+ALL_FUNCS="#### all functions enabled ####"
+
+test_func() {
+ if ! echo "$1" | grep -q "^$2\$"; then
+ return 0
+ fi
+ echo "$1" | grep -v "^$2\$"
+ return 1
+}
+
+check_set_ftrace_filter() {
+ cat=`cat $FILTER`
+ dd1=`dd if=$FILTER bs=1 | grep -v -e 'records in' -e 'records out' -e 'bytes copied'`
+ dd100=`dd if=$FILTER bs=100 | grep -v -e 'records in' -e 'records out' -e 'bytes copied'`
+
+ echo "Testing '$@'"
+
+ while [ $# -gt 0 ]; do
+ echo "test $1"
+ if cat=`test_func "$cat" "$1"`; then
+ return 0
+ fi
+ if dd1=`test_func "$dd1" "$1"`; then
+ return 0
+ fi
+ if dd100=`test_func "$dd100" "$1"`; then
+ return 0
+ fi
+ shift
+ done
+
+ if [ -n "$cat" ]; then
+ return 0
+ fi
+ if [ -n "$dd1" ]; then
+ return 0
+ fi
+ if [ -n "$dd100" ]; then
+ return 0
+ fi
+ return 1;
+}
+
+if check_set_ftrace_filter "$ALL_FUNCS"; then
+ fail "Expected only $ALL_FUNCS"
+fi
+
+echo "$FUNC1:traceoff" > set_ftrace_filter
+if check_set_ftrace_filter "$ALL_FUNCS" "$FUNC1:traceoff:unlimited"; then
+ fail "Expected $ALL_FUNCS and $FUNC1:traceoff:unlimited"
+fi
+
+echo "$FUNC1" > set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" "$FUNC1:traceoff:unlimited"; then
+ fail "Expected $FUNC1 and $FUNC1:traceoff:unlimited"
+fi
+
+echo "$FUNC2" >> set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" "$FUNC2" "$FUNC1:traceoff:unlimited"; then
+ fail "Expected $FUNC1 $FUNC2 and $FUNC1:traceoff:unlimited"
+fi
+
+echo "$FUNC2:traceoff" >> set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" "$FUNC2" "$FUNC1:traceoff:unlimited" "$FUNC2:traceoff:unlimited"; then
+ fail "Expected $FUNC1 $FUNC2 $FUNC1:traceoff:unlimited and $FUNC2:traceoff:unlimited"
+fi
+
+echo "$FUNC1" > set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" "$FUNC1:traceoff:unlimited" "$FUNC2:traceoff:unlimited"; then
+ fail "Expected $FUNC1 $FUNC1:traceoff:unlimited and $FUNC2:traceoff:unlimited"
+fi
+
+echo > set_ftrace_filter
+if check_set_ftrace_filter "$ALL_FUNCS" "$FUNC1:traceoff:unlimited" "$FUNC2:traceoff:unlimited"; then
+ fail "Expected $ALL_FUNCS $FUNC1:traceoff:unlimited and $FUNC2:traceoff:unlimited"
+fi
+
+reset_ftrace_filter
+
+if check_set_ftrace_filter "$ALL_FUNCS"; then
+ fail "Expected $ALL_FUNCS"
+fi
+
+echo "$FUNC1" > set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" ; then
+ fail "Expected $FUNC1"
+fi
+
+echo "$FUNC2" >> set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" "$FUNC2" ; then
+ fail "Expected $FUNC1 and $FUNC2"
+fi
+
+test_actual() { # Compares $TMPDIR/expected with set_ftrace_filter
+ cat set_ftrace_filter | grep -v '#' | cut -d' ' -f1 | cut -d':' -f1 | sort -u > $TMPDIR/actual
+ DIFF=`diff $TMPDIR/actual $TMPDIR/expected`
+ test -z "$DIFF"
+}
+
+# Set traceoff trigger for all fuctions with "lock" in their name
+cat available_filter_functions | cut -d' ' -f1 | grep 'lock' | sort -u > $TMPDIR/expected
+echo '*lock*:traceoff' > set_ftrace_filter
+test_actual
+
+# now remove all with 'try' in it, and end with lock
+grep -v 'try.*lock$' $TMPDIR/expected > $TMPDIR/expected2
+mv $TMPDIR/expected2 $TMPDIR/expected
+echo '!*try*lock:traceoff' >> set_ftrace_filter
+test_actual
+
+# remove all that start with "m" and end with "lock"
+grep -v '^m.*lock$' $TMPDIR/expected > $TMPDIR/expected2
+mv $TMPDIR/expected2 $TMPDIR/expected
+echo '!m*lock:traceoff' >> set_ftrace_filter
+test_actual
+
+# remove all that start with "c" and have "unlock"
+grep -v '^c.*unlock' $TMPDIR/expected > $TMPDIR/expected2
+mv $TMPDIR/expected2 $TMPDIR/expected
+echo '!c*unlock*:traceoff' >> set_ftrace_filter
+test_actual
+
+# clear all the rest
+> $TMPDIR/expected
+echo '!*:traceoff' >> set_ftrace_filter
+test_actual
+
+rm $TMPDIR/expected
+rm $TMPDIR/actual
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
new file mode 100644
index 000000000..f6d9ac732
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
@@ -0,0 +1,186 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - test for function traceon/off triggers
+# flags: instance
+#
+# Ftrace allows to add triggers to functions, such as enabling or disabling
+# tracing, enabling or disabling trace events, or recording a stack trace
+# within the ring buffer.
+#
+# This test is designed to test enabling and disabling tracing triggers
+#
+
+# The triggers are set within the set_ftrace_filter file
+if [ ! -f set_ftrace_filter ]; then
+ echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
+ exit_unsupported
+fi
+
+do_reset() {
+ reset_ftrace_filter
+ reset_tracer
+ disable_events
+ clear_trace
+ enable_tracing
+}
+
+fail() { # mesg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+SLEEP_TIME=".1"
+
+do_reset
+
+echo "Testing function probes with enabling disabling tracing:"
+
+cnt_trace() {
+ grep -v '^#' trace | wc -l
+}
+
+echo '** DISABLE TRACING'
+disable_tracing
+clear_trace
+
+cnt=`cnt_trace`
+if [ $cnt -ne 0 ]; then
+ fail "Found junk in trace"
+fi
+
+
+echo '** ENABLE EVENTS'
+
+echo 1 > events/enable
+
+echo '** ENABLE TRACING'
+enable_tracing
+
+cnt=`cnt_trace`
+if [ $cnt -eq 0 ]; then
+ fail "Nothing found in trace"
+fi
+
+# powerpc uses .schedule
+func="schedule"
+available_file=available_filter_functions
+if [ -d ../../instances -a -f ../../available_filter_functions ]; then
+ available_file=../../available_filter_functions
+fi
+x=`grep '^\.schedule$' available_filter_functions | wc -l`
+if [ "$x" -eq 1 ]; then
+ func=".schedule"
+fi
+
+echo '** SET TRACEOFF'
+
+echo "$func:traceoff" > set_ftrace_filter
+if [ -d ../../instances ]; then # Check instances
+ cur=`cat set_ftrace_filter`
+ top=`cat ../../set_ftrace_filter`
+ if [ "$cur" = "$top" ]; then
+ echo "This kernel is too old to support per instance filter"
+ reset_ftrace_filter
+ exit_unsupported
+ fi
+fi
+
+cnt=`grep schedule set_ftrace_filter | wc -l`
+if [ $cnt -ne 1 ]; then
+ fail "Did not find traceoff trigger"
+fi
+
+cnt=`cnt_trace`
+sleep $SLEEP_TIME
+cnt2=`cnt_trace`
+
+if [ $cnt -ne $cnt2 ]; then
+ fail "Tracing is not stopped"
+fi
+
+on=`cat tracing_on`
+if [ $on != "0" ]; then
+ fail "Tracing is not off"
+fi
+
+csum1=`md5sum trace`
+sleep $SLEEP_TIME
+csum2=`md5sum trace`
+
+if [ "$csum1" != "$csum2" ]; then
+ fail "Tracing file is still changing"
+fi
+
+clear_trace
+
+cnt=`cnt_trace`
+if [ $cnt -ne 0 ]; then
+ fail "Tracing is still happeing"
+fi
+
+echo "!$func:traceoff" >> set_ftrace_filter
+
+cnt=`grep schedule set_ftrace_filter | wc -l`
+if [ $cnt -ne 0 ]; then
+ fail "traceoff trigger still exists"
+fi
+
+on=`cat tracing_on`
+if [ $on != "0" ]; then
+ fail "Tracing is started again"
+fi
+
+echo "$func:traceon" > set_ftrace_filter
+
+cnt=`grep schedule set_ftrace_filter | wc -l`
+if [ $cnt -ne 1 ]; then
+ fail "traceon trigger not found"
+fi
+
+cnt=`cnt_trace`
+if [ $cnt -eq 0 ]; then
+ fail "Tracing did not start"
+fi
+
+on=`cat tracing_on`
+if [ $on != "1" ]; then
+ fail "Tracing was not enabled"
+fi
+
+
+echo "!$func:traceon" >> set_ftrace_filter
+
+cnt=`grep schedule set_ftrace_filter | wc -l`
+if [ $cnt -ne 0 ]; then
+ fail "traceon trigger still exists"
+fi
+
+check_sleep() {
+ val=$1
+ sleep $SLEEP_TIME
+ cat set_ftrace_filter
+ on=`cat tracing_on`
+ if [ $on != "$val" ]; then
+ fail "Expected tracing_on to be $val, but it was $on"
+ fi
+}
+
+
+echo "$func:traceoff:3" > set_ftrace_filter
+check_sleep "0"
+echo 1 > tracing_on
+check_sleep "0"
+echo 1 > tracing_on
+check_sleep "0"
+echo 1 > tracing_on
+check_sleep "1"
+echo "!$func:traceoff:0" > set_ftrace_filter
+
+if grep -e traceon -e traceoff set_ftrace_filter; then
+ fail "Tracing on and off triggers still exist"
+fi
+
+disable_events
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
new file mode 100644
index 000000000..e4645d5e3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -0,0 +1,100 @@
+
+clear_trace() { # reset trace output
+ echo > trace
+}
+
+disable_tracing() { # stop trace recording
+ echo 0 > tracing_on
+}
+
+enable_tracing() { # start trace recording
+ echo 1 > tracing_on
+}
+
+reset_tracer() { # reset the current tracer
+ echo nop > current_tracer
+}
+
+reset_trigger_file() {
+ # remove action triggers first
+ grep -H ':on[^:]*(' $@ |
+ while read line; do
+ cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
+ file=`echo $line | cut -f1 -d:`
+ echo "!$cmd" >> $file
+ done
+ grep -Hv ^# $@ |
+ while read line; do
+ cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
+ file=`echo $line | cut -f1 -d:`
+ echo "!$cmd" > $file
+ done
+}
+
+reset_trigger() { # reset all current setting triggers
+ if [ -d events/synthetic ]; then
+ reset_trigger_file events/synthetic/*/trigger
+ fi
+ reset_trigger_file events/*/*/trigger
+}
+
+reset_events_filter() { # reset all current setting filters
+ grep -v ^none events/*/*/filter |
+ while read line; do
+ echo 0 > `echo $line | cut -f1 -d:`
+ done
+}
+
+reset_ftrace_filter() { # reset all triggers in set_ftrace_filter
+ echo > set_ftrace_filter
+ grep -v '^#' set_ftrace_filter | while read t; do
+ tr=`echo $t | cut -d: -f2`
+ if [ "$tr" = "" ]; then
+ continue
+ fi
+ if ! grep -q "$t" set_ftrace_filter; then
+ continue;
+ fi
+ name=`echo $t | cut -d: -f1 | cut -d' ' -f1`
+ if [ $tr = "enable_event" -o $tr = "disable_event" ]; then
+ tr=`echo $t | cut -d: -f2-4`
+ limit=`echo $t | cut -d: -f5`
+ else
+ tr=`echo $t | cut -d: -f2`
+ limit=`echo $t | cut -d: -f3`
+ fi
+ if [ "$limit" != "unlimited" ]; then
+ tr="$tr:$limit"
+ fi
+ echo "!$name:$tr" > set_ftrace_filter
+ done
+}
+
+disable_events() {
+ echo 0 > events/enable
+}
+
+clear_synthetic_events() { # reset all current synthetic events
+ grep -v ^# synthetic_events |
+ while read line; do
+ echo "!$line" >> synthetic_events
+ done
+}
+
+initialize_ftrace() { # Reset ftrace to initial-state
+# As the initial state, ftrace will be set to nop tracer,
+# no events, no triggers, no filters, no function filters,
+# no probes, and tracing on.
+ disable_tracing
+ reset_tracer
+ reset_trigger
+ reset_events_filter
+ disable_events
+ echo > set_event_pid # event tracer is always on
+ [ -f set_ftrace_filter ] && echo | tee set_ftrace_*
+ [ -f set_graph_function ] && echo | tee set_graph_*
+ [ -f stack_trace_filter ] && echo > stack_trace_filter
+ [ -f kprobe_events ] && echo > kprobe_events
+ [ -f uprobe_events ] && echo > uprobe_events
+ enable_tracing
+}
diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
new file mode 100644
index 000000000..4fa0f7914
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
@@ -0,0 +1,146 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test creation and deletion of trace instances while setting an event
+
+if [ ! -d instances ] ; then
+ echo "no instance directory with this kernel"
+ exit_unsupported;
+fi
+
+fail() { # mesg
+ rmdir foo 2>/dev/null
+ echo $1
+ set -e
+ exit_fail
+}
+
+cd instances
+
+# we don't want to fail on error
+set +e
+
+mkdir x
+rmdir x
+result=$?
+
+if [ $result -ne 0 ]; then
+ echo "instance rmdir not supported"
+ exit_unsupported
+fi
+
+instance_slam() {
+ while :; do
+ mkdir foo 2> /dev/null
+ rmdir foo 2> /dev/null
+ done
+}
+
+instance_read() {
+ while :; do
+ cat foo/trace 1> /dev/null 2>&1
+ done
+}
+
+instance_set() {
+ while :; do
+ echo 1 > foo/events/sched/sched_switch
+ done 2> /dev/null
+}
+
+instance_slam &
+p1=$!
+echo $p1
+
+instance_set &
+p2=$!
+echo $p2
+
+instance_read &
+p3=$!
+echo $p3
+
+sleep 1
+
+kill -1 $p3
+kill -1 $p2
+kill -1 $p1
+
+echo "Wait for processes to finish"
+wait $p1 $p2 $p3
+echo "all processes finished, wait for cleanup"
+sleep 1
+
+mkdir foo
+ls foo > /dev/null
+rmdir foo
+if [ -d foo ]; then
+ fail "foo still exists"
+fi
+
+mkdir foo
+echo "schedule:enable_event:sched:sched_switch" > foo/set_ftrace_filter
+rmdir foo
+if [ -d foo ]; then
+ fail "foo still exists"
+fi
+if grep -q "schedule:enable_event:sched:sched_switch" ../set_ftrace_filter; then
+ echo "Older kernel detected. Cleanup filter"
+ echo '!schedule:enable_event:sched:sched_switch' > ../set_ftrace_filter
+fi
+
+instance_slam() {
+ while :; do
+ mkdir x
+ mkdir y
+ mkdir z
+ rmdir x
+ rmdir y
+ rmdir z
+ done 2>/dev/null
+}
+
+instance_slam &
+p1=$!
+echo $p1
+
+instance_slam &
+p2=$!
+echo $p2
+
+instance_slam &
+p3=$!
+echo $p3
+
+instance_slam &
+p4=$!
+echo $p4
+
+instance_slam &
+p5=$!
+echo $p5
+
+ls -lR >/dev/null
+sleep 1
+
+kill -1 $p1
+kill -1 $p2
+kill -1 $p3
+kill -1 $p4
+kill -1 $p5
+
+echo "Wait for processes to finish"
+wait $p1 $p2 $p3 $p4 $p5
+echo "all processes finished, wait for cleanup"
+
+mkdir x y z
+ls x y z
+rmdir x y z
+for d in x y z; do
+ if [ -d $d ]; then
+ fail "instance $d still exists"
+ fi
+done
+
+set -e
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance.tc b/tools/testing/selftests/ftrace/test.d/instances/instance.tc
new file mode 100644
index 000000000..b84651283
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/instances/instance.tc
@@ -0,0 +1,86 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test creation and deletion of trace instances
+
+if [ ! -d instances ] ; then
+ echo "no instance directory with this kernel"
+ exit_unsupported;
+fi
+
+fail() { # mesg
+ rmdir x y z 2>/dev/null
+ echo $1
+ set -e
+ exit_fail
+}
+
+cd instances
+
+# we don't want to fail on error
+set +e
+
+mkdir x
+rmdir x
+result=$?
+
+if [ $result -ne 0 ]; then
+ echo "instance rmdir not supported"
+ exit_unsupported
+fi
+
+instance_slam() {
+ while :; do
+ mkdir x
+ mkdir y
+ mkdir z
+ rmdir x
+ rmdir y
+ rmdir z
+ done 2>/dev/null
+}
+
+instance_slam &
+p1=$!
+echo $p1
+
+instance_slam &
+p2=$!
+echo $p2
+
+instance_slam &
+p3=$!
+echo $p3
+
+instance_slam &
+p4=$!
+echo $p4
+
+instance_slam &
+p5=$!
+echo $p5
+
+ls -lR >/dev/null
+sleep 1
+
+kill -1 $p1
+kill -1 $p2
+kill -1 $p3
+kill -1 $p4
+kill -1 $p5
+
+echo "Wait for processes to finish"
+wait $p1 $p2 $p3 $p4 $p5
+echo "all processes finished, wait for cleanup"
+
+mkdir x y z
+ls x y z
+rmdir x y z
+for d in x y z; do
+ if [ -d $d ]; then
+ fail "instance $d still exists"
+ fi
+done
+
+set -e
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
new file mode 100644
index 000000000..4604d2103
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe dynamic event - adding and removing
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+echo 0 > events/enable
+echo > kprobe_events
+echo p:myevent _do_fork > kprobe_events
+grep myevent kprobe_events
+test -d events/kprobes/myevent
+echo > kprobe_events
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
new file mode 100644
index 000000000..bbc443a91
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe dynamic event - busy event check
+
+[ -f kprobe_events ] || exit_unsupported
+
+echo 0 > events/enable
+echo > kprobe_events
+echo p:myevent _do_fork > kprobe_events
+test -d events/kprobes/myevent
+echo 1 > events/kprobes/myevent/enable
+echo > kprobe_events && exit_fail # this must fail
+echo 0 > events/kprobes/myevent/enable
+echo > kprobe_events # this must succeed
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
new file mode 100644
index 000000000..8b43c6804
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
@@ -0,0 +1,18 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe dynamic event with arguments
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+echo 0 > events/enable
+echo > kprobe_events
+echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events
+grep testprobe kprobe_events
+test -d events/kprobes/testprobe
+echo 1 > events/kprobes/testprobe/enable
+( echo "forked")
+echo 0 > events/kprobes/testprobe/enable
+echo "-:testprobe" >> kprobe_events
+clear_trace
+test -d events/kprobes/testprobe && exit_fail || exit_pass
+
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
new file mode 100644
index 000000000..1ad70cdaf
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -0,0 +1,48 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event string type argument
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+echo 0 > events/enable
+echo > kprobe_events
+
+case `uname -m` in
+x86_64)
+ ARG1=%di
+;;
+i[3456]86)
+ ARG1=%ax
+;;
+aarch64)
+ ARG1=%x0
+;;
+arm*)
+ ARG1=%r0
+;;
+ppc64*)
+ ARG1=%r3
+;;
+ppc*)
+ ARG1=%r3
+;;
+*)
+ echo "Please implement other architecture here"
+ exit_untested
+esac
+
+: "Test get argument (1)"
+echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+echo "p:test _do_fork" >> kprobe_events
+grep -qe "testprobe.* arg1=\"test\"" trace
+
+echo 0 > events/kprobes/testprobe/enable
+: "Test get argument (2)"
+echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+echo "p:test _do_fork" >> kprobe_events
+grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace
+
+echo 0 > events/enable
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
new file mode 100644
index 000000000..92ffb3bd3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
@@ -0,0 +1,107 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event argument syntax
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue
+
+echo 0 > events/enable
+echo > kprobe_events
+
+PROBEFUNC="vfs_read"
+GOODREG=
+BADREG=
+GOODSYM="_sdata"
+if ! grep -qw ${GOODSYM} /proc/kallsyms ; then
+ GOODSYM=$PROBEFUNC
+fi
+BADSYM="deaqswdefr"
+SYMADDR=0x`grep -w ${GOODSYM} /proc/kallsyms | cut -f 1 -d " "`
+GOODTYPE="x16"
+BADTYPE="y16"
+
+case `uname -m` in
+x86_64|i[3456]86)
+ GOODREG=%ax
+ BADREG=%ex
+;;
+aarch64)
+ GOODREG=%x0
+ BADREG=%ax
+;;
+arm*)
+ GOODREG=%r0
+ BADREG=%ax
+;;
+ppc*)
+ GOODREG=%r3
+ BADREG=%msr
+;;
+*)
+ echo "Please implement other architecture here"
+ exit_untested
+esac
+
+test_goodarg() # Good-args
+{
+ while [ "$1" ]; do
+ echo "p ${PROBEFUNC} $1" > kprobe_events
+ shift 1
+ done;
+}
+
+test_badarg() # Bad-args
+{
+ while [ "$1" ]; do
+ ! echo "p ${PROBEFUNC} $1" > kprobe_events
+ shift 1
+ done;
+}
+
+echo > kprobe_events
+
+: "Register access"
+test_goodarg ${GOODREG}
+test_badarg ${BADREG}
+
+: "Symbol access"
+test_goodarg "@${GOODSYM}" "@${SYMADDR}" "@${GOODSYM}+10" "@${GOODSYM}-10"
+test_badarg "@" "@${BADSYM}" "@${GOODSYM}*10" "@${GOODSYM}/10" \
+ "@${GOODSYM}%10" "@${GOODSYM}&10" "@${GOODSYM}|10"
+
+: "Stack access"
+test_goodarg "\$stack" "\$stack0" "\$stack1"
+test_badarg "\$stackp" "\$stack0+10" "\$stack1-10"
+
+: "Retval access"
+echo "r ${PROBEFUNC} \$retval" > kprobe_events
+! echo "p ${PROBEFUNC} \$retval" > kprobe_events
+
+# $comm was introduced in 4.8, older kernels reject it.
+if grep -A1 "fetcharg:" README | grep -q '\$comm' ; then
+: "Comm access"
+test_goodarg "\$comm"
+fi
+
+: "Indirect memory access"
+test_goodarg "+0(${GOODREG})" "-0(${GOODREG})" "+10(\$stack)" \
+ "+0(\$stack1)" "+10(@${GOODSYM}-10)" "+0(+10(+20(\$stack)))"
+test_badarg "+(${GOODREG})" "(${GOODREG}+10)" "-(${GOODREG})" "(${GOODREG})" \
+ "+10(\$comm)" "+0(${GOODREG})+10"
+
+: "Name assignment"
+test_goodarg "varname=${GOODREG}"
+test_badarg "varname=varname2=${GOODREG}"
+
+: "Type syntax"
+test_goodarg "${GOODREG}:${GOODTYPE}"
+test_badarg "${GOODREG}::${GOODTYPE}" "${GOODREG}:${BADTYPE}" \
+ "${GOODTYPE}:${GOODREG}"
+
+: "Combination check"
+
+test_goodarg "\$comm:string" "+0(\$stack):string"
+test_badarg "\$comm:x64" "\$stack:string" "${GOODREG}:string"
+
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
new file mode 100644
index 000000000..2a1755bfc
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
@@ -0,0 +1,38 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobes event arguments with types
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue
+
+echo 0 > events/enable
+echo > kprobe_events
+enable_tracing
+
+echo 'p:testprobe _do_fork $stack0:s32 $stack0:u32 $stack0:x32 $stack0:b8@4/32' > kprobe_events
+grep testprobe kprobe_events
+test -d events/kprobes/testprobe
+
+echo 1 > events/kprobes/testprobe/enable
+( echo "forked")
+echo 0 > events/kprobes/testprobe/enable
+ARGS=`tail -n 1 trace | sed -e 's/.* arg1=\(.*\) arg2=\(.*\) arg3=\(.*\) arg4=\(.*\)/\1 \2 \3 \4/'`
+
+check_types() {
+ X1=`printf "%x" $1 | tail -c 8`
+ X2=`printf "%x" $2`
+ X3=`printf "%x" $3`
+ test $X1 = $X2
+ test $X2 = $X3
+ test 0x$X3 = $3
+
+ B4=`printf "%02x" $4`
+ B3=`echo -n $X3 | tail -c 3 | head -c 2`
+ test $B3 = $B4
+}
+check_types $ARGS
+
+echo "-:testprobe" >> kprobe_events
+clear_trace
+test -d events/kprobes/testprobe && exit_fail || exit_pass
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
new file mode 100644
index 000000000..2724a1068
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
@@ -0,0 +1,37 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event auto/manual naming
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+disable_events
+echo > kprobe_events
+
+:;: "Add an event on function without name" ;:
+
+FUNC=`grep " [tT] .*vfs_read$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " "`
+[ "x" != "x$FUNC" ] || exit_unresolved
+echo "p $FUNC" > kprobe_events
+PROBE_NAME=`echo $FUNC | tr ".:" "_"`
+test -d events/kprobes/p_${PROBE_NAME}_0 || exit_failure
+
+:;: "Add an event on function with new name" ;:
+
+echo "p:event1 $FUNC" > kprobe_events
+test -d events/kprobes/event1 || exit_failure
+
+:;: "Add an event on function with new name and group" ;:
+
+echo "p:kprobes2/event2 $FUNC" > kprobe_events
+test -d events/kprobes2/event2 || exit_failure
+
+:;: "Add an event on dot function without name" ;:
+
+FUNC=`grep -m 10 " [tT] .*\.isra\..*$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " "`
+[ "x" != "x$FUNC" ] || exit_unresolved
+echo "p $FUNC" > kprobe_events
+EVENT=`grep $FUNC kprobe_events | cut -f 1 -d " " | cut -f 2 -d:`
+[ "x" != "x$EVENT" ] || exit_failure
+test -d events/$EVENT || exit_failure
+
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
new file mode 100644
index 000000000..cc4cac0e6
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
@@ -0,0 +1,56 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe dynamic event with function tracer
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+grep function available_tracers || exit_unsupported # this is configurable
+
+# prepare
+echo nop > current_tracer
+echo _do_fork > set_ftrace_filter
+echo 0 > events/enable
+echo > kprobe_events
+echo 'p:testprobe _do_fork' > kprobe_events
+
+# kprobe on / ftrace off
+echo 1 > events/kprobes/testprobe/enable
+echo > trace
+( echo "forked")
+grep testprobe trace
+! grep '_do_fork <-' trace
+
+# kprobe on / ftrace on
+echo function > current_tracer
+echo > trace
+( echo "forked")
+grep testprobe trace
+grep '_do_fork <-' trace
+
+# kprobe off / ftrace on
+echo 0 > events/kprobes/testprobe/enable
+echo > trace
+( echo "forked")
+! grep testprobe trace
+grep '_do_fork <-' trace
+
+# kprobe on / ftrace on
+echo 1 > events/kprobes/testprobe/enable
+echo function > current_tracer
+echo > trace
+( echo "forked")
+grep testprobe trace
+grep '_do_fork <-' trace
+
+# kprobe on / ftrace off
+echo nop > current_tracer
+echo > trace
+( echo "forked")
+grep testprobe trace
+! grep '_do_fork <-' trace
+
+# cleanup
+echo nop > current_tracer
+echo > set_ftrace_filter
+echo 0 > events/kprobes/testprobe/enable
+echo > kprobe_events
+echo > trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
new file mode 100644
index 000000000..1e9f75f7a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe dynamic event - probing module
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+disable_events
+echo > kprobe_events
+
+:;: "Add an event on a module function without specifying event name" ;:
+
+MOD=`lsmod | head -n 2 | tail -n 1 | cut -f1 -d" "`
+FUNC=`grep -m 1 ".* t .*\\[$MOD\\]" /proc/kallsyms | xargs | cut -f3 -d" "`
+[ "x" != "x$MOD" -a "y" != "y$FUNC" ] || exit_unresolved
+echo "p $MOD:$FUNC" > kprobe_events
+PROBE_NAME=`echo $MOD:$FUNC | tr ".:" "_"`
+test -d events/kprobes/p_${PROBE_NAME}_0 || exit_failure
+
+:;: "Add an event on a module function with new event name" ;:
+
+echo "p:event1 $MOD:$FUNC" > kprobe_events
+test -d events/kprobes/event1 || exit_failure
+
+:;: "Add an event on a module function with new event and group name" ;:
+
+echo "p:kprobes1/event1 $MOD:$FUNC" > kprobe_events
+test -d events/kprobes1/event1 || exit_failure
+
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
new file mode 100644
index 000000000..321954683
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
@@ -0,0 +1,17 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kretprobe dynamic event with arguments
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+echo 0 > events/enable
+echo > kprobe_events
+echo 'r:testprobe2 _do_fork $retval' > kprobe_events
+grep testprobe2 kprobe_events
+test -d events/kprobes/testprobe2
+echo 1 > events/kprobes/testprobe2/enable
+( echo "forked")
+echo 0 > events/kprobes/testprobe2/enable
+echo '-:testprobe2' >> kprobe_events
+clear_trace
+test -d events/kprobes/testprobe2 && exit_fail || exit_pass
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc
new file mode 100644
index 000000000..7c0290684
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc
@@ -0,0 +1,41 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kretprobe dynamic event with maxactive
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+grep -q 'r\[maxactive\]' README || exit_unsupported # this is older version
+
+echo > kprobe_events
+
+# Test if we successfully reject unknown messages
+if echo 'a:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi
+
+# Test if we successfully reject too big maxactive
+if echo 'r1000000:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi
+
+# Test if we successfully reject unparsable numbers for maxactive
+if echo 'r10fuzz:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi
+
+# Test for kretprobe with event name without maxactive
+echo 'r:myprobeaccept inet_csk_accept' > kprobe_events
+grep myprobeaccept kprobe_events
+test -d events/kprobes/myprobeaccept
+echo '-:myprobeaccept' >> kprobe_events
+
+# Test for kretprobe with event name with a small maxactive
+echo 'r10:myprobeaccept inet_csk_accept' > kprobe_events
+grep myprobeaccept kprobe_events
+test -d events/kprobes/myprobeaccept
+echo '-:myprobeaccept' >> kprobe_events
+
+# Test for kretprobe without event name without maxactive
+echo 'r inet_csk_accept' > kprobe_events
+grep inet_csk_accept kprobe_events
+echo > kprobe_events
+
+# Test for kretprobe without event name with a small maxactive
+echo 'r10 inet_csk_accept' > kprobe_events
+grep inet_csk_accept kprobe_events
+echo > kprobe_events
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
new file mode 100644
index 000000000..da298f191
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -0,0 +1,39 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Register/unregister many kprobe events
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+# ftrace fentry skip size depends on the machine architecture.
+# Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le
+case `uname -m` in
+ x86_64|i[3456]86) OFFS=5;;
+ ppc64le) OFFS=8;;
+ *) OFFS=0;;
+esac
+
+if [ -d events/kprobes ]; then
+ echo 0 > events/kprobes/enable
+ echo > kprobe_events
+fi
+
+N=0
+echo "Setup up kprobes on first available 256 text symbols"
+grep -i " t " /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \
+while read i; do
+ echo p ${i}+${OFFS} >> kprobe_events && N=$((N+1)) ||:
+ test $N -eq 256 && break
+done
+
+L=`cat kprobe_events | wc -l`
+if [ $L -ne 256 ]; then
+ echo "The number of kprobes events ($L) is not 256"
+ exit_fail
+fi
+
+echo 1 > events/kprobes/enable
+echo 0 > events/kprobes/enable
+echo > kprobe_events
+echo "Waiting for unoptimizing & freeing"
+sleep 5
+echo "Done"
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
new file mode 100644
index 000000000..519d2763f
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
@@ -0,0 +1,43 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe events - probe points
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+TARGET_FUNC=tracefs_create_dir
+
+dec_addr() { # hexaddr
+ printf "%d" "0x"`echo $1 | tail -c 8`
+}
+
+set_offs() { # prev target next
+ A1=`dec_addr $1`
+ A2=`dec_addr $2`
+ A3=`dec_addr $3`
+ TARGET="0x$2" # an address
+ PREV=`expr $A1 - $A2` # offset to previous symbol
+ NEXT=+`expr $A3 - $A2` # offset to next symbol
+ OVERFLOW=+`printf "0x%x" ${PREV}` # overflow offset to previous symbol
+}
+
+# We have to decode symbol addresses to get correct offsets.
+# If the offset is not an instruction boundary, it cause -EILSEQ.
+set_offs `grep -A1 -B1 ${TARGET_FUNC} /proc/kallsyms | cut -f 1 -d " " | xargs`
+
+UINT_TEST=no
+# printf "%x" -1 returns (unsigned long)-1.
+if [ `printf "%x" -1 | wc -c` != 9 ]; then
+ UINT_TEST=yes
+fi
+
+echo 0 > events/enable
+echo > kprobe_events
+echo "p:testprobe ${TARGET_FUNC}" > kprobe_events
+echo "p:testprobe ${TARGET}" > kprobe_events
+echo "p:testprobe ${TARGET_FUNC}${NEXT}" > kprobe_events
+! echo "p:testprobe ${TARGET_FUNC}${PREV}" > kprobe_events
+if [ "${UINT_TEST}" = yes ]; then
+! echo "p:testprobe ${TARGET_FUNC}${OVERFLOW}" > kprobe_events
+fi
+echo > kprobe_events
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
new file mode 100644
index 000000000..cbd174334
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
@@ -0,0 +1,73 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: test for the preemptirqsoff tracer
+
+MOD=preemptirq_delay_test
+
+fail() {
+ reset_tracer
+ rmmod $MOD || true
+ exit_fail
+}
+
+unsup() { #msg
+ reset_tracer
+ rmmod $MOD || true
+ echo $1
+ exit_unsupported
+}
+
+modprobe $MOD || unsup "$MOD module not available"
+rmmod $MOD
+
+grep -q "preemptoff" available_tracers || unsup "preemptoff tracer not enabled"
+grep -q "irqsoff" available_tracers || unsup "irqsoff tracer not enabled"
+
+reset_tracer
+
+# Simulate preemptoff section for half a second couple of times
+echo preemptoff > current_tracer
+sleep 1
+modprobe $MOD test_mode=preempt delay=500000 || fail
+rmmod $MOD || fail
+modprobe $MOD test_mode=preempt delay=500000 || fail
+rmmod $MOD || fail
+modprobe $MOD test_mode=preempt delay=500000 || fail
+rmmod $MOD || fail
+
+cat trace
+
+# Confirm which tracer
+grep -q "tracer: preemptoff" trace || fail
+
+# Check the end of the section
+egrep -q "5.....us : <stack trace>" trace || fail
+
+# Check for 500ms of latency
+egrep -q "latency: 5..... us" trace || fail
+
+reset_tracer
+
+# Simulate irqsoff section for half a second couple of times
+echo irqsoff > current_tracer
+sleep 1
+modprobe $MOD test_mode=irq delay=500000 || fail
+rmmod $MOD || fail
+modprobe $MOD test_mode=irq delay=500000 || fail
+rmmod $MOD || fail
+modprobe $MOD test_mode=irq delay=500000 || fail
+rmmod $MOD || fail
+
+cat trace
+
+# Confirm which tracer
+grep -q "tracer: irqsoff" trace || fail
+
+# Check the end of the section
+egrep -q "5.....us : <stack trace>" trace || fail
+
+# Check for 500ms of latency
+egrep -q "latency: 5..... us" trace || fail
+
+reset_tracer
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/template b/tools/testing/selftests/ftrace/test.d/template
new file mode 100644
index 000000000..5c39ceb18
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/template
@@ -0,0 +1,10 @@
+#!/bin/sh
+# description: %HERE DESCRIBE WHAT THIS DOES%
+# you have to add ".tc" extention for your testcase file
+# Note that all tests are run with "errexit" option.
+
+exit 0 # Return 0 if the test is passed, otherwise return !0
+# Or you can call exit_pass for passed test, and exit_fail for failed test.
+# If the test could not run because of lack of feature, call exit_unsupported
+# If the test returned unclear results, call exit_unresolved
+# If the test is a dummy, or a placeholder, call exit_untested
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
new file mode 100644
index 000000000..2aabab363
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
@@ -0,0 +1,39 @@
+#!/bin/sh
+# description: event trigger - test extended error support
+
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test extended error support"
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+! echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger 2> /dev/null
+if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then
+ fail "Failed to generate extended error in histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
new file mode 100644
index 000000000..7fd5b4a8f
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test field variable support
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test field variable support"
+
+echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events
+echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+ fail "Failed to create inter-event histogram"
+fi
+
+if ! grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+ fail "Failed to create histogram with field variable"
+fi
+
+echo '!hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+
+if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+ fail "Failed to remove histogram with field variable"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
new file mode 100644
index 000000000..c93dbe38b
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
@@ -0,0 +1,58 @@
+#!/bin/sh
+# description: event trigger - test inter-event combined histogram trigger
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+clear_synthetic_events
+
+echo "Test create synthetic event"
+
+echo 'waking_latency u64 lat pid_t pid' > synthetic_events
+if [ ! -d events/synthetic/waking_latency ]; then
+ fail "Failed to create waking_latency synthetic event"
+fi
+
+echo "Test combined histogram"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=pid:waking_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).waking_latency($waking_lat,pid) if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' > events/synthetic/waking_latency/trigger
+
+echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events
+echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger
+
+echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then
+ fail "Failed to create combined histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
new file mode 100644
index 000000000..c193dce61
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
@@ -0,0 +1,44 @@
+#!/bin/sh
+# description: event trigger - test multiple actions on hist trigger
+
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test multiple actions on hist trigger"
+echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+TRIGGER1=events/sched/sched_wakeup/trigger
+TRIGGER2=events/sched/sched_switch/trigger
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' > $TRIGGER1
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0 if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid) if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid) if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid if next_comm=="cyclictest"' >> $TRIGGER2
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
new file mode 100644
index 000000000..e84e7d048
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch action
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+ fail "Failed to create onmatch action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
new file mode 100644
index 000000000..7907d8aac
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch-onmax action
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch-onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then
+ fail "Failed to create onmatch-onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
new file mode 100644
index 000000000..38b7ed624
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
@@ -0,0 +1,48 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmax action
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+ping localhost -c 3
+if ! grep -q "max:" events/sched/sched_switch/hist; then
+ fail "Failed to create onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
new file mode 100644
index 000000000..c604438df
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test synthetic event create remove
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+reset_trigger
+
+echo "Test remove synthetic event"
+echo '!wakeup_latency u64 lat pid_t pid char comm[16]' >> synthetic_events
+if [ -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to delete wakeup_latency synthetic event"
+fi
+
+reset_trigger
+
+echo "Test create synthetic event with an error"
+echo 'wakeup_latency u64 lat pid_t pid char' > synthetic_events > /dev/null
+if [ -d events/synthetic/wakeup_latency ]; then
+ fail "Created wakeup_latency synthetic event with an invalid format"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
new file mode 100644
index 000000000..88e6c3f43
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
@@ -0,0 +1,80 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test synthetic_events syntax parser
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test synthetic_events syntax parser"
+
+echo > synthetic_events
+
+# synthetic event must have a field
+! echo "myevent" >> synthetic_events
+echo "myevent u64 var1" >> synthetic_events
+
+# synthetic event must be found in synthetic_events
+grep "myevent[[:space:]]u64 var1" synthetic_events
+
+# it is not possible to add same name event
+! echo "myevent u64 var2" >> synthetic_events
+
+# Non-append open will cleanup all events and add new one
+echo "myevent u64 var2" > synthetic_events
+
+# multiple fields with different spaces
+echo "myevent u64 var1; u64 var2;" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+echo "myevent u64 var1 ; u64 var2 ;" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+echo "myevent u64 var1 ;u64 var2" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+
+# test field types
+echo "myevent u32 var" > synthetic_events
+echo "myevent u16 var" > synthetic_events
+echo "myevent u8 var" > synthetic_events
+echo "myevent s64 var" > synthetic_events
+echo "myevent s32 var" > synthetic_events
+echo "myevent s16 var" > synthetic_events
+echo "myevent s8 var" > synthetic_events
+
+echo "myevent char var" > synthetic_events
+echo "myevent int var" > synthetic_events
+echo "myevent long var" > synthetic_events
+echo "myevent pid_t var" > synthetic_events
+
+echo "myevent unsigned char var" > synthetic_events
+echo "myevent unsigned int var" > synthetic_events
+echo "myevent unsigned long var" > synthetic_events
+grep "myevent[[:space:]]unsigned long var" synthetic_events
+
+# test string type
+echo "myevent char var[10]" > synthetic_events
+grep "myevent[[:space:]]char\[10\] var" synthetic_events
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc
new file mode 100644
index 000000000..28cc355a3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc
@@ -0,0 +1,66 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test event enable/disable trigger
+# flags: instance
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+FEATURE=`grep enable_event events/sched/sched_process_fork/trigger`
+if [ -z "$FEATURE" ]; then
+ echo "event enable/disable trigger is not supported"
+ exit_unsupported
+fi
+
+echo "Test enable_event trigger"
+echo 0 > events/sched/sched_switch/enable
+echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
+( echo "forked")
+if [ `cat events/sched/sched_switch/enable` != '1*' ]; then
+ fail "enable_event trigger on sched_process_fork did not work"
+fi
+
+reset_trigger
+
+echo "Test disable_event trigger"
+echo 1 > events/sched/sched_switch/enable
+echo 'disable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
+( echo "forked")
+if [ `cat events/sched/sched_switch/enable` != '0*' ]; then
+ fail "disable_event trigger on sched_process_fork did not work"
+fi
+
+reset_trigger
+
+echo "Test semantic error for event enable/disable trigger"
+! echo 'enable_event:nogroup:noevent' > events/sched/sched_process_fork/trigger
+! echo 'disable_event+1' > events/sched/sched_process_fork/trigger
+echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
+! echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
+! echo 'disable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc
new file mode 100644
index 000000000..a48e23eb8
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc
@@ -0,0 +1,61 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test trigger filter
+# flags: instance
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test trigger filter"
+echo 1 > tracing_on
+echo 'traceoff if child_pid == 0' > events/sched/sched_process_fork/trigger
+( echo "forked")
+if [ `cat tracing_on` -ne 1 ]; then
+ fail "traceoff trigger on sched_process_fork did not work"
+fi
+
+reset_trigger
+
+echo "Test semantic error for trigger filter"
+! echo 'traceoff if a' > events/sched/sched_process_fork/trigger
+! echo 'traceoff if common_pid=0' > events/sched/sched_process_fork/trigger
+! echo 'traceoff if common_pid==b' > events/sched/sched_process_fork/trigger
+echo 'traceoff if common_pid == 0' > events/sched/sched_process_fork/trigger
+echo '!traceoff' > events/sched/sched_process_fork/trigger
+! echo 'traceoff if common_pid == child_pid' > events/sched/sched_process_fork/trigger
+echo 'traceoff if common_pid <= 0' > events/sched/sched_process_fork/trigger
+echo '!traceoff' > events/sched/sched_process_fork/trigger
+echo 'traceoff if common_pid >= 0' > events/sched/sched_process_fork/trigger
+echo '!traceoff' > events/sched/sched_process_fork/trigger
+echo 'traceoff if parent_pid >= 0 && child_pid >= 0' > events/sched/sched_process_fork/trigger
+echo '!traceoff' > events/sched/sched_process_fork/trigger
+echo 'traceoff if parent_pid >= 0 || child_pid >= 0' > events/sched/sched_process_fork/trigger
+echo '!traceoff' > events/sched/sched_process_fork/trigger
+
+
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
new file mode 100644
index 000000000..8da80efc4
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
@@ -0,0 +1,76 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test histogram modifiers
+# flags: instance
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test histogram with execname modifier"
+
+echo 'hist:keys=common_pid.execname' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+COMM=`cat /proc/$$/comm`
+grep "common_pid: $COMM" events/sched/sched_process_fork/hist > /dev/null || \
+ fail "execname modifier on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with hex modifier"
+
+echo 'hist:keys=parent_pid.hex' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+# Note that $$ is the parent pid. $PID is current PID.
+HEX=`printf %x $PID`
+grep "parent_pid: $HEX" events/sched/sched_process_fork/hist > /dev/null || \
+ fail "hex modifier on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with syscall modifier"
+
+echo 'hist:keys=id.syscall' > events/raw_syscalls/sys_exit/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep "id: \(unknown_\|sys_\)" events/raw_syscalls/sys_exit/hist > /dev/null || \
+ fail "syscall modifier on raw_syscalls/sys_exit did not work"
+
+
+reset_trigger
+
+echo "Test histgram with log2 modifier"
+
+echo 'hist:keys=bytes_req.log2' > events/kmem/kmalloc/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep 'bytes_req: ~ 2^[0-9]*' events/kmem/kmalloc/hist > /dev/null || \
+ fail "log2 modifier on kmem/kmalloc did not work"
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
new file mode 100644
index 000000000..449fe9ff9
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
@@ -0,0 +1,84 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test histogram trigger
+# flags: instance
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test histogram basic tigger"
+
+echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep parent_pid events/sched/sched_process_fork/hist > /dev/null || \
+ fail "hist trigger on sched_process_fork did not work"
+grep child events/sched/sched_process_fork/hist > /dev/null || \
+ fail "hist trigger on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with compound keys"
+
+echo 'hist:keys=parent_pid,child_pid' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep '^{ parent_pid:.*, child_pid:.*}' events/sched/sched_process_fork/hist > /dev/null || \
+ fail "compound keys on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with string key"
+
+echo 'hist:keys=parent_comm' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+COMM=`cat /proc/$$/comm`
+grep "parent_comm: $COMM" events/sched/sched_process_fork/hist > /dev/null || \
+ fail "string key on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with sort key"
+
+echo 'hist:keys=parent_pid,child_pid:sort=child_pid.ascending' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+
+check_inc() {
+ while [ $# -gt 1 ]; do
+ [ $1 -gt $2 ] && return 1
+ shift 1
+ done
+ return 0
+}
+check_inc `grep -o "child_pid:[[:space:]]*[[:digit:]]*" \
+ events/sched/sched_process_fork/hist | cut -d: -f2 ` ||
+ fail "sort param on sched_process_fork did not work"
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
new file mode 100644
index 000000000..c5ef8b9d0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test multiple histogram triggers
+# flags: instance
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+reset_trigger
+
+echo "Test histogram multiple tiggers"
+
+echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger
+echo 'hist:keys=parent_comm:vals=child_pid' >> events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep parent_pid events/sched/sched_process_fork/hist > /dev/null || \
+ fail "hist trigger on sched_process_fork did not work"
+grep child events/sched/sched_process_fork/hist > /dev/null || \
+ fail "hist trigger on sched_process_fork did not work"
+COMM=`cat /proc/$$/comm`
+grep "parent_comm: $COMM" events/sched/sched_process_fork/hist > /dev/null || \
+ fail "string key on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with its name"
+
+echo 'hist:name=test_hist:keys=common_pid' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep test_hist events/sched/sched_process_fork/hist > /dev/null || \
+ fail "named event on sched_process_fork did not work"
+
+echo "Test same named histogram on different events"
+
+echo 'hist:name=test_hist:keys=common_pid' > events/sched/sched_process_exit/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep test_hist events/sched/sched_process_exit/hist > /dev/null || \
+ fail "named event on sched_process_fork did not work"
+
+diffs=`diff events/sched/sched_process_exit/hist events/sched/sched_process_fork/hist | wc -l`
+test $diffs -eq 0 || fail "Same name histograms are not same"
+
+reset_trigger
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
new file mode 100644
index 000000000..ed38f0050
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test snapshot-trigger
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f snapshot ]; then
+ echo "snapshot is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+FEATURE=`grep snapshot events/sched/sched_process_fork/trigger`
+if [ -z "$FEATURE" ]; then
+ echo "snapshot trigger is not supported"
+ exit_unsupported
+fi
+
+echo "Test snapshot tigger"
+echo 0 > snapshot
+echo 1 > events/sched/sched_process_fork/enable
+( echo "forked")
+echo 'snapshot:1' > events/sched/sched_process_fork/trigger
+( echo "forked")
+grep sched_process_fork snapshot > /dev/null || \
+ fail "snapshot trigger on sched_process_fork did not work"
+
+reset_trigger
+echo 0 > snapshot
+echo 0 > events/sched/sched_process_fork/enable
+
+echo "Test snapshot semantic errors"
+
+! echo "snapshot+1" > events/sched/sched_process_fork/trigger
+echo "snapshot" > events/sched/sched_process_fork/trigger
+! echo "snapshot" > events/sched/sched_process_fork/trigger
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc
new file mode 100644
index 000000000..3121d795a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test stacktrace-trigger
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+FEATURE=`grep stacktrace events/sched/sched_process_fork/trigger`
+if [ -z "$FEATURE" ]; then
+ echo "stacktrace trigger is not supported"
+ exit_unsupported
+fi
+
+echo "Test stacktrace tigger"
+echo 0 > trace
+echo 0 > options/stacktrace
+echo 'stacktrace' > events/sched/sched_process_fork/trigger
+( echo "forked")
+grep "<stack trace>" trace > /dev/null || \
+ fail "stacktrace trigger on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test stacktrace semantic errors"
+
+! echo "stacktrace:foo" > events/sched/sched_process_fork/trigger
+echo "stacktrace" > events/sched/sched_process_fork/trigger
+! echo "stacktrace" > events/sched/sched_process_fork/trigger
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
new file mode 100644
index 000000000..2acbfe2c0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
@@ -0,0 +1,49 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test histogram trigger
+# flags: instance
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+ echo "event trace_marker is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
+do_reset
+
+echo "Test histogram trace_marker tigger"
+
+echo 'hist:keys=common_pid' > events/ftrace/print/trigger
+for i in `seq 1 10` ; do echo "hello" > trace_marker; done
+grep 'hitcount: *10$' events/ftrace/print/hist > /dev/null || \
+ fail "hist trigger did not trigger correct times on trace_marker"
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
new file mode 100644
index 000000000..6748e8cb4
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test snapshot trigger
+# flags: instance
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ echo 0 > snapshot
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f snapshot ]; then
+ echo "snapshot is not supported"
+ exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+ echo "event trace_marker is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+test_trace() {
+ file=$1
+ x=$2
+
+ cat $file | while read line; do
+ comment=`echo $line | sed -e 's/^#//'`
+ if [ "$line" != "$comment" ]; then
+ continue
+ fi
+ echo "testing $line for >$x<"
+ match=`echo $line | sed -e "s/>$x<//"`
+ if [ "$line" == "$match" ]; then
+ fail "$line does not have >$x< in it"
+ fi
+ let x=$x+2
+ done
+}
+
+do_reset
+
+echo "Test snapshot trace_marker tigger"
+
+echo 'snapshot' > events/ftrace/print/trigger
+
+# make sure the snapshot is allocated
+
+grep -q 'Snapshot is allocated' snapshot
+
+for i in `seq 1 10` ; do echo "hello >$i<" > trace_marker; done
+
+test_trace trace 1
+test_trace snapshot 2
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
new file mode 100644
index 000000000..0a69c5d1c
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
@@ -0,0 +1,68 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test histogram with synthetic event against kernel event
+# flags:
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ echo > synthetic_events
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic events not supported"
+ exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+ echo "event trace_marker is not supported"
+ exit_unsupported
+fi
+
+if [ ! -d events/sched/sched_waking ]; then
+ echo "event sched_waking is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
+do_reset
+
+echo "Test histogram kernel event to trace_marker latency histogram trigger"
+
+echo 'latency u64 lat' > synthetic_events
+echo 'hist:keys=pid:ts0=common_timestamp.usecs' > events/sched/sched_waking/trigger
+echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).latency($lat)' > events/ftrace/print/trigger
+echo 'hist:keys=common_pid,lat:sort=lat' > events/synthetic/latency/trigger
+sleep 1
+echo "hello" > trace_marker
+
+grep 'hitcount: *1$' events/ftrace/print/hist > /dev/null || \
+ fail "hist trigger did not trigger correct times on trace_marker"
+
+grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \
+ fail "hist trigger did not trigger "
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
new file mode 100644
index 000000000..3666dd6ab
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
@@ -0,0 +1,66 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test histogram with synthetic event
+# flags:
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ echo > synthetic_events
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic events not supported"
+ exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+ echo "event trace_marker is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
+do_reset
+
+echo "Test histogram trace_marker to trace_marker latency histogram trigger"
+
+echo 'latency u64 lat' > synthetic_events
+echo 'hist:keys=common_pid:ts0=common_timestamp.usecs if buf == "start"' > events/ftrace/print/trigger
+echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$ts0:onmatch(ftrace.print).latency($lat) if buf == "end"' >> events/ftrace/print/trigger
+echo 'hist:keys=common_pid,lat:sort=lat' > events/synthetic/latency/trigger
+echo -n "start" > trace_marker
+echo -n "end" > trace_marker
+
+cnt=`grep 'hitcount: *1$' events/ftrace/print/hist | wc -l`
+
+if [ $cnt -ne 2 ]; then
+ fail "hist trace_marker trigger did not trigger correctly"
+fi
+
+grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \
+ fail "hist trigger did not trigger "
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc
new file mode 100644
index 000000000..c59d9eb54
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc
@@ -0,0 +1,59 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test traceon/off trigger
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test traceoff trigger"
+echo 1 > tracing_on
+echo 'traceoff' > events/sched/sched_process_fork/trigger
+( echo "forked")
+if [ `cat tracing_on` -ne 0 ]; then
+ fail "traceoff trigger on sched_process_fork did not work"
+fi
+
+reset_trigger
+
+echo "Test traceon trigger"
+echo 0 > tracing_on
+echo 'traceon' > events/sched/sched_process_fork/trigger
+( echo "forked")
+if [ `cat tracing_on` -ne 1 ]; then
+ fail "traceoff trigger on sched_process_fork did not work"
+fi
+
+reset_trigger
+
+echo "Test semantic error for traceoff/on trigger"
+! echo 'traceoff:badparam' > events/sched/sched_process_fork/trigger
+! echo 'traceoff+0' > events/sched/sched_process_fork/trigger
+echo 'traceon' > events/sched/sched_process_fork/trigger
+! echo 'traceon' > events/sched/sched_process_fork/trigger
+! echo 'traceoff' > events/sched/sched_process_fork/trigger
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
new file mode 100644
index 000000000..11e157d75
--- /dev/null
+++ b/tools/testing/selftests/futex/Makefile
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: GPL-2.0
+SUBDIRS := functional
+
+TEST_PROGS := run.sh
+
+.PHONY: all clean
+
+include ../lib.mk
+
+all:
+ @for DIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir $$BUILD_TARGET -p; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ if [ -e $$DIR/$(TEST_PROGS) ]; then \
+ rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \
+ fi \
+ done
+
+override define INSTALL_RULE
+ mkdir -p $(INSTALL_PATH)
+ install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+
+ @for SUBDIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$SUBDIR; \
+ mkdir $$BUILD_TARGET -p; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
+ done;
+endef
+
+override define CLEAN
+ @for DIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir $$BUILD_TARGET -p; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ done
+endef
diff --git a/tools/testing/selftests/futex/README b/tools/testing/selftests/futex/README
new file mode 100644
index 000000000..f3926c33e
--- /dev/null
+++ b/tools/testing/selftests/futex/README
@@ -0,0 +1,62 @@
+Futex Test
+==========
+Futex Test is intended to thoroughly test the Linux kernel futex system call
+API.
+
+Functional tests shall test the documented behavior of the futex operation
+code under test. This includes checking for proper behavior under normal use,
+odd corner cases, regression tests, and abject abuse and misuse.
+
+Futextest will also provide example implementation of mutual exclusion
+primitives. These can be used as is in user applications or can serve as
+examples for system libraries. These will likely be added to either a new lib/
+directory or purely as header files under include/, I'm leaning toward the
+latter.
+
+Quick Start
+-----------
+# make
+# ./run.sh
+
+Design and Implementation Goals
+-------------------------------
+o Tests should be as self contained as is practical so as to facilitate sharing
+ the individual tests on mailing list discussions and bug reports.
+o The build system shall remain as simple as possible, avoiding any archive or
+ shared object building and linking.
+o Where possible, any helper functions or other package-wide code shall be
+ implemented in header files, avoiding the need to compile intermediate object
+ files.
+o External dependencies shall remain as minimal as possible. Currently gcc
+ and glibc are the only dependencies.
+o Tests return 0 for success and < 0 for failure.
+
+Output Formatting
+-----------------
+Test output shall be easily parsable by both human and machine. Title and
+results are printed to stdout, while intermediate ERROR or FAIL messages are
+sent to stderr. Tests shall support the -c option to print PASS, FAIL, and
+ERROR strings in color for easy visual parsing. Output shall conform to the
+following format:
+
+test_name: Description of the test
+ Arguments: arg1=val1 #units specified for clarity where appropriate
+ ERROR: Description of unexpected error
+ FAIL: Reason for test failure
+ # FIXME: Perhaps an " INFO: informational message" option would be
+ # useful here. Using -v to toggle it them on and off, as with -c.
+ # there may be multiple ERROR or FAIL messages
+Result: (PASS|FAIL|ERROR)
+
+Naming
+------
+o FIXME: decide on a sane test naming scheme. Currently the tests are named
+ based on the primary futex operation they test. Eventually this will become a
+ problem as we intend to write multiple tests which collide in this namespace.
+ Perhaps something like "wait-wake-1" "wait-wake-2" is adequate, leaving the
+ detailed description in the test source and the output.
+
+Coding Style
+------------
+o The Futex Test project adheres to the coding standards set forth by Linux
+ kernel as defined in the Linux source Documentation/process/coding-style.rst.
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
new file mode 100644
index 000000000..a09f57061
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -0,0 +1,7 @@
+futex_requeue_pi
+futex_requeue_pi_mismatched_ops
+futex_requeue_pi_signal_restart
+futex_wait_private_mapped_file
+futex_wait_timeout
+futex_wait_uninitialized_heap
+futex_wait_wouldblock
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
new file mode 100644
index 000000000..30996306c
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+INCLUDES := -I../include -I../../
+CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
+LDFLAGS := $(LDFLAGS) -pthread -lrt
+
+HEADERS := \
+ ../include/futextest.h \
+ ../include/atomic.h \
+ ../include/logging.h
+TEST_GEN_FILES := \
+ futex_wait_timeout \
+ futex_wait_wouldblock \
+ futex_requeue_pi \
+ futex_requeue_pi_signal_restart \
+ futex_requeue_pi_mismatched_ops \
+ futex_wait_uninitialized_heap \
+ futex_wait_private_mapped_file
+
+TEST_PROGS := run.sh
+
+top_srcdir = ../../../../..
+KSFT_KHDR_INSTALL := 1
+include ../../lib.mk
+
+$(TEST_GEN_FILES): $(HEADERS)
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
new file mode 100644
index 000000000..54cd5c414
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
@@ -0,0 +1,412 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2006-2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * This test excercises the futex syscall op codes needed for requeuing
+ * priority inheritance aware POSIX condition variables and mutexes.
+ *
+ * AUTHORS
+ * Sripathi Kodi <sripathik@in.ibm.com>
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2008-Jan-13: Initial version by Sripathi Kodi <sripathik@in.ibm.com>
+ * 2009-Nov-6: futex test adaptation by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <limits.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include "atomic.h"
+#include "futextest.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-requeue-pi"
+#define MAX_WAKE_ITERS 1000
+#define THREAD_MAX 10
+#define SIGNAL_PERIOD_US 100
+
+atomic_t waiters_blocked = ATOMIC_INITIALIZER;
+atomic_t waiters_woken = ATOMIC_INITIALIZER;
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+futex_t wake_complete = FUTEX_INITIALIZER;
+
+/* Test option defaults */
+static long timeout_ns;
+static int broadcast;
+static int owner;
+static int locked;
+
+struct thread_arg {
+ long id;
+ struct timespec *timeout;
+ int lock;
+ int ret;
+};
+#define THREAD_ARG_INITIALIZER { 0, NULL, 0, 0 }
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -b Broadcast wakeup (all waiters)\n");
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -l Lock the pi futex across requeue\n");
+ printf(" -o Use a third party pi futex owner during requeue (cancels -l)\n");
+ printf(" -t N Timeout in nanoseconds (default: 0)\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
+ int policy, int prio)
+{
+ int ret;
+ struct sched_param schedp;
+ pthread_attr_t attr;
+
+ pthread_attr_init(&attr);
+ memset(&schedp, 0, sizeof(schedp));
+
+ ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
+ if (ret) {
+ error("pthread_attr_setinheritsched\n", ret);
+ return -1;
+ }
+
+ ret = pthread_attr_setschedpolicy(&attr, policy);
+ if (ret) {
+ error("pthread_attr_setschedpolicy\n", ret);
+ return -1;
+ }
+
+ schedp.sched_priority = prio;
+ ret = pthread_attr_setschedparam(&attr, &schedp);
+ if (ret) {
+ error("pthread_attr_setschedparam\n", ret);
+ return -1;
+ }
+
+ ret = pthread_create(pth, &attr, func, arg);
+ if (ret) {
+ error("pthread_create\n", ret);
+ return -1;
+ }
+ return 0;
+}
+
+
+void *waiterfn(void *arg)
+{
+ struct thread_arg *args = (struct thread_arg *)arg;
+ futex_t old_val;
+
+ info("Waiter %ld: running\n", args->id);
+ /* Each thread sleeps for a different amount of time
+ * This is to avoid races, because we don't lock the
+ * external mutex here */
+ usleep(1000 * (long)args->id);
+
+ old_val = f1;
+ atomic_inc(&waiters_blocked);
+ info("Calling futex_wait_requeue_pi: %p (%u) -> %p\n",
+ &f1, f1, &f2);
+ args->ret = futex_wait_requeue_pi(&f1, old_val, &f2, args->timeout,
+ FUTEX_PRIVATE_FLAG);
+
+ info("waiter %ld woke with %d %s\n", args->id, args->ret,
+ args->ret < 0 ? strerror(errno) : "");
+ atomic_inc(&waiters_woken);
+ if (args->ret < 0) {
+ if (args->timeout && errno == ETIMEDOUT)
+ args->ret = 0;
+ else {
+ args->ret = RET_ERROR;
+ error("futex_wait_requeue_pi\n", errno);
+ }
+ futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+ }
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+ info("Waiter %ld: exiting with %d\n", args->id, args->ret);
+ pthread_exit((void *)&args->ret);
+}
+
+void *broadcast_wakerfn(void *arg)
+{
+ struct thread_arg *args = (struct thread_arg *)arg;
+ int nr_requeue = INT_MAX;
+ int task_count = 0;
+ futex_t old_val;
+ int nr_wake = 1;
+ int i = 0;
+
+ info("Waker: waiting for waiters to block\n");
+ while (waiters_blocked.val < THREAD_MAX)
+ usleep(1000);
+ usleep(1000);
+
+ info("Waker: Calling broadcast\n");
+ if (args->lock) {
+ info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2);
+ futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+ }
+ continue_requeue:
+ old_val = f1;
+ args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, nr_wake, nr_requeue,
+ FUTEX_PRIVATE_FLAG);
+ if (args->ret < 0) {
+ args->ret = RET_ERROR;
+ error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+ } else if (++i < MAX_WAKE_ITERS) {
+ task_count += args->ret;
+ if (task_count < THREAD_MAX - waiters_woken.val)
+ goto continue_requeue;
+ } else {
+ error("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n",
+ 0, MAX_WAKE_ITERS, task_count, THREAD_MAX);
+ args->ret = RET_ERROR;
+ }
+
+ futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG);
+
+ if (args->lock)
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+ if (args->ret > 0)
+ args->ret = task_count;
+
+ info("Waker: exiting with %d\n", args->ret);
+ pthread_exit((void *)&args->ret);
+}
+
+void *signal_wakerfn(void *arg)
+{
+ struct thread_arg *args = (struct thread_arg *)arg;
+ unsigned int old_val;
+ int nr_requeue = 0;
+ int task_count = 0;
+ int nr_wake = 1;
+ int i = 0;
+
+ info("Waker: waiting for waiters to block\n");
+ while (waiters_blocked.val < THREAD_MAX)
+ usleep(1000);
+ usleep(1000);
+
+ while (task_count < THREAD_MAX && waiters_woken.val < THREAD_MAX) {
+ info("task_count: %d, waiters_woken: %d\n",
+ task_count, waiters_woken.val);
+ if (args->lock) {
+ info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n",
+ f2, &f2);
+ futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+ }
+ info("Waker: Calling signal\n");
+ /* cond_signal */
+ old_val = f1;
+ args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2,
+ nr_wake, nr_requeue,
+ FUTEX_PRIVATE_FLAG);
+ if (args->ret < 0)
+ args->ret = -errno;
+ info("futex: %x\n", f2);
+ if (args->lock) {
+ info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n",
+ f2, &f2);
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+ }
+ info("futex: %x\n", f2);
+ if (args->ret < 0) {
+ error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+ args->ret = RET_ERROR;
+ break;
+ }
+
+ task_count += args->ret;
+ usleep(SIGNAL_PERIOD_US);
+ i++;
+ /* we have to loop at least THREAD_MAX times */
+ if (i > MAX_WAKE_ITERS + THREAD_MAX) {
+ error("max signaling iterations (%d) reached, giving up on pending waiters.\n",
+ 0, MAX_WAKE_ITERS + THREAD_MAX);
+ args->ret = RET_ERROR;
+ break;
+ }
+ }
+
+ futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG);
+
+ if (args->ret >= 0)
+ args->ret = task_count;
+
+ info("Waker: exiting with %d\n", args->ret);
+ info("Waker: waiters_woken: %d\n", waiters_woken.val);
+ pthread_exit((void *)&args->ret);
+}
+
+void *third_party_blocker(void *arg)
+{
+ struct thread_arg *args = (struct thread_arg *)arg;
+ int ret2 = 0;
+
+ args->ret = futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+ if (args->ret)
+ goto out;
+ args->ret = futex_wait(&wake_complete, wake_complete, NULL,
+ FUTEX_PRIVATE_FLAG);
+ ret2 = futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+ out:
+ if (args->ret || ret2) {
+ error("third_party_blocker() futex error", 0);
+ args->ret = RET_ERROR;
+ }
+
+ pthread_exit((void *)&args->ret);
+}
+
+int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
+{
+ void *(*wakerfn)(void *) = signal_wakerfn;
+ struct thread_arg blocker_arg = THREAD_ARG_INITIALIZER;
+ struct thread_arg waker_arg = THREAD_ARG_INITIALIZER;
+ pthread_t waiter[THREAD_MAX], waker, blocker;
+ struct timespec ts, *tsp = NULL;
+ struct thread_arg args[THREAD_MAX];
+ int *waiter_ret;
+ int i, ret = RET_PASS;
+
+ if (timeout_ns) {
+ time_t secs;
+
+ info("timeout_ns = %ld\n", timeout_ns);
+ ret = clock_gettime(CLOCK_MONOTONIC, &ts);
+ secs = (ts.tv_nsec + timeout_ns) / 1000000000;
+ ts.tv_nsec = ((int64_t)ts.tv_nsec + timeout_ns) % 1000000000;
+ ts.tv_sec += secs;
+ info("ts.tv_sec = %ld\n", ts.tv_sec);
+ info("ts.tv_nsec = %ld\n", ts.tv_nsec);
+ tsp = &ts;
+ }
+
+ if (broadcast)
+ wakerfn = broadcast_wakerfn;
+
+ if (third_party_owner) {
+ if (create_rt_thread(&blocker, third_party_blocker,
+ (void *)&blocker_arg, SCHED_FIFO, 1)) {
+ error("Creating third party blocker thread failed\n",
+ errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+ }
+
+ atomic_set(&waiters_woken, 0);
+ for (i = 0; i < THREAD_MAX; i++) {
+ args[i].id = i;
+ args[i].timeout = tsp;
+ info("Starting thread %d\n", i);
+ if (create_rt_thread(&waiter[i], waiterfn, (void *)&args[i],
+ SCHED_FIFO, 1)) {
+ error("Creating waiting thread failed\n", errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+ }
+ waker_arg.lock = lock;
+ if (create_rt_thread(&waker, wakerfn, (void *)&waker_arg,
+ SCHED_FIFO, 1)) {
+ error("Creating waker thread failed\n", errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+
+ /* Wait for threads to finish */
+ /* Store the first error or failure encountered in waiter_ret */
+ waiter_ret = &args[0].ret;
+ for (i = 0; i < THREAD_MAX; i++)
+ pthread_join(waiter[i],
+ *waiter_ret ? NULL : (void **)&waiter_ret);
+
+ if (third_party_owner)
+ pthread_join(blocker, NULL);
+ pthread_join(waker, NULL);
+
+out:
+ if (!ret) {
+ if (*waiter_ret)
+ ret = *waiter_ret;
+ else if (waker_arg.ret < 0)
+ ret = waker_arg.ret;
+ else if (blocker_arg.ret)
+ ret = blocker_arg.ret;
+ }
+
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ int c, ret;
+
+ while ((c = getopt(argc, argv, "bchlot:v:")) != -1) {
+ switch (c) {
+ case 'b':
+ broadcast = 1;
+ break;
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'l':
+ locked = 1;
+ break;
+ case 'o':
+ owner = 1;
+ locked = 0;
+ break;
+ case 't':
+ timeout_ns = atoi(optarg);
+ break;
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_print_msg("%s: Test requeue functionality\n", basename(argv[0]));
+ ksft_print_msg(
+ "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
+ broadcast, locked, owner, timeout_ns);
+
+ /*
+ * FIXME: unit_test is obsolete now that we parse options and the
+ * various style of runs are done by run.sh - simplify the code and move
+ * unit_test into main()
+ */
+ ret = unit_test(broadcast, locked, owner, timeout_ns);
+
+ print_result(TEST_NAME, ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
new file mode 100644
index 000000000..08187a165
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
@@ -0,0 +1,138 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * 1. Block a thread using FUTEX_WAIT
+ * 2. Attempt to use FUTEX_CMP_REQUEUE_PI on the futex from 1.
+ * 3. The kernel must detect the mismatch and return -EINVAL.
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-requeue-pi-mismatched-ops"
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+int child_ret = 0;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+void *blocking_child(void *arg)
+{
+ child_ret = futex_wait(&f1, f1, NULL, FUTEX_PRIVATE_FLAG);
+ if (child_ret < 0) {
+ child_ret = -errno;
+ error("futex_wait\n", errno);
+ }
+ return (void *)&child_ret;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = RET_PASS;
+ pthread_t child;
+ int c;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_print_msg("%s: Detect mismatched requeue_pi operations\n",
+ basename(argv[0]));
+
+ if (pthread_create(&child, NULL, blocking_child, NULL)) {
+ error("pthread_create\n", errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+ /* Allow the child to block in the kernel. */
+ sleep(1);
+
+ /*
+ * The kernel should detect the waiter did not setup the
+ * q->requeue_pi_key and return -EINVAL. If it does not,
+ * it likely gave the lock to the child, which is now hung
+ * in the kernel.
+ */
+ ret = futex_cmp_requeue_pi(&f1, f1, &f2, 1, 0, FUTEX_PRIVATE_FLAG);
+ if (ret < 0) {
+ if (errno == EINVAL) {
+ /*
+ * The kernel correctly detected the mismatched
+ * requeue_pi target and aborted. Wake the child with
+ * FUTEX_WAKE.
+ */
+ ret = futex_wake(&f1, 1, FUTEX_PRIVATE_FLAG);
+ if (ret == 1) {
+ ret = RET_PASS;
+ } else if (ret < 0) {
+ error("futex_wake\n", errno);
+ ret = RET_ERROR;
+ } else {
+ error("futex_wake did not wake the child\n", 0);
+ ret = RET_ERROR;
+ }
+ } else {
+ error("futex_cmp_requeue_pi\n", errno);
+ ret = RET_ERROR;
+ }
+ } else if (ret > 0) {
+ fail("futex_cmp_requeue_pi failed to detect the mismatch\n");
+ ret = RET_FAIL;
+ } else {
+ error("futex_cmp_requeue_pi found no waiters\n", 0);
+ ret = RET_ERROR;
+ }
+
+ pthread_join(child, NULL);
+
+ if (!ret)
+ ret = child_ret;
+
+ out:
+ /* If the kernel crashes, we shouldn't return at all. */
+ print_result(TEST_NAME, ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
new file mode 100644
index 000000000..f0542a344
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
@@ -0,0 +1,225 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2006-2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * This test exercises the futex_wait_requeue_pi() signal handling both
+ * before and after the requeue. The first should be restarted by the
+ * kernel. The latter should return EWOULDBLOCK to the waiter.
+ *
+ * AUTHORS
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2008-May-5: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "atomic.h"
+#include "futextest.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-requeue-pi-signal-restart"
+#define DELAY_US 100
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+atomic_t requeued = ATOMIC_INITIALIZER;
+
+int waiter_ret = 0;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
+ int policy, int prio)
+{
+ struct sched_param schedp;
+ pthread_attr_t attr;
+ int ret;
+
+ pthread_attr_init(&attr);
+ memset(&schedp, 0, sizeof(schedp));
+
+ ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
+ if (ret) {
+ error("pthread_attr_setinheritsched\n", ret);
+ return -1;
+ }
+
+ ret = pthread_attr_setschedpolicy(&attr, policy);
+ if (ret) {
+ error("pthread_attr_setschedpolicy\n", ret);
+ return -1;
+ }
+
+ schedp.sched_priority = prio;
+ ret = pthread_attr_setschedparam(&attr, &schedp);
+ if (ret) {
+ error("pthread_attr_setschedparam\n", ret);
+ return -1;
+ }
+
+ ret = pthread_create(pth, &attr, func, arg);
+ if (ret) {
+ error("pthread_create\n", ret);
+ return -1;
+ }
+ return 0;
+}
+
+void handle_signal(int signo)
+{
+ info("signal received %s requeue\n",
+ requeued.val ? "after" : "prior to");
+}
+
+void *waiterfn(void *arg)
+{
+ unsigned int old_val;
+ int res;
+
+ waiter_ret = RET_PASS;
+
+ info("Waiter running\n");
+ info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+ old_val = f1;
+ res = futex_wait_requeue_pi(&f1, old_val, &(f2), NULL,
+ FUTEX_PRIVATE_FLAG);
+ if (!requeued.val || errno != EWOULDBLOCK) {
+ fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n",
+ res, strerror(errno));
+ info("w2:futex: %x\n", f2);
+ if (!res)
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+ waiter_ret = RET_FAIL;
+ }
+
+ info("Waiter exiting with %d\n", waiter_ret);
+ pthread_exit(NULL);
+}
+
+
+int main(int argc, char *argv[])
+{
+ unsigned int old_val;
+ struct sigaction sa;
+ pthread_t waiter;
+ int c, res, ret = RET_PASS;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_print_msg("%s: Test signal handling during requeue_pi\n",
+ basename(argv[0]));
+ ksft_print_msg("\tArguments: <none>\n");
+
+ sa.sa_handler = handle_signal;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = 0;
+ if (sigaction(SIGUSR1, &sa, NULL)) {
+ error("sigaction\n", errno);
+ exit(1);
+ }
+
+ info("m1:f2: %x\n", f2);
+ info("Creating waiter\n");
+ res = create_rt_thread(&waiter, waiterfn, NULL, SCHED_FIFO, 1);
+ if (res) {
+ error("Creating waiting thread failed", res);
+ ret = RET_ERROR;
+ goto out;
+ }
+
+ info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+ info("m2:f2: %x\n", f2);
+ futex_lock_pi(&f2, 0, 0, FUTEX_PRIVATE_FLAG);
+ info("m3:f2: %x\n", f2);
+
+ while (1) {
+ /*
+ * signal the waiter before requeue, waiter should automatically
+ * restart futex_wait_requeue_pi() in the kernel. Wait for the
+ * waiter to block on f1 again.
+ */
+ info("Issuing SIGUSR1 to waiter\n");
+ pthread_kill(waiter, SIGUSR1);
+ usleep(DELAY_US);
+
+ info("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n");
+ old_val = f1;
+ res = futex_cmp_requeue_pi(&f1, old_val, &(f2), 1, 0,
+ FUTEX_PRIVATE_FLAG);
+ /*
+ * If res is non-zero, we either requeued the waiter or hit an
+ * error, break out and handle it. If it is zero, then the
+ * signal may have hit before the the waiter was blocked on f1.
+ * Try again.
+ */
+ if (res > 0) {
+ atomic_set(&requeued, 1);
+ break;
+ } else if (res < 0) {
+ error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+ ret = RET_ERROR;
+ break;
+ }
+ }
+ info("m4:f2: %x\n", f2);
+
+ /*
+ * Signal the waiter after requeue, waiter should return from
+ * futex_wait_requeue_pi() with EWOULDBLOCK. Join the thread here so the
+ * futex_unlock_pi() can't happen before the signal wakeup is detected
+ * in the kernel.
+ */
+ info("Issuing SIGUSR1 to waiter\n");
+ pthread_kill(waiter, SIGUSR1);
+ info("Waiting for waiter to return\n");
+ pthread_join(waiter, NULL);
+
+ info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2);
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+ info("m5:f2: %x\n", f2);
+
+ out:
+ if (ret == RET_PASS && waiter_ret)
+ ret = waiter_ret;
+
+ print_result(TEST_NAME, ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
new file mode 100644
index 000000000..6216de828
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
@@ -0,0 +1,128 @@
+/******************************************************************************
+ *
+ * Copyright FUJITSU LIMITED 2010
+ * Copyright KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Internally, Futex has two handling mode, anon and file. The private file
+ * mapping is special. At first it behave as file, but after write anything
+ * it behave as anon. This test is intent to test such case.
+ *
+ * AUTHOR
+ * KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * HISTORY
+ * 2010-Jan-6: Initial version by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ *****************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <libgen.h>
+#include <signal.h>
+
+#include "logging.h"
+#include "futextest.h"
+
+#define TEST_NAME "futex-wait-private-mapped-file"
+#define PAGE_SZ 4096
+
+char pad[PAGE_SZ] = {1};
+futex_t val = 1;
+char pad2[PAGE_SZ] = {1};
+
+#define WAKE_WAIT_US 3000000
+struct timespec wait_timeout = { .tv_sec = 5, .tv_nsec = 0};
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+void *thr_futex_wait(void *arg)
+{
+ int ret;
+
+ info("futex wait\n");
+ ret = futex_wait(&val, 1, &wait_timeout, 0);
+ if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) {
+ error("futex error.\n", errno);
+ print_result(TEST_NAME, RET_ERROR);
+ exit(RET_ERROR);
+ }
+
+ if (ret && errno == ETIMEDOUT)
+ fail("waiter timedout\n");
+
+ info("futex_wait: ret = %d, errno = %d\n", ret, errno);
+
+ return NULL;
+}
+
+int main(int argc, char **argv)
+{
+ pthread_t thr;
+ int ret = RET_PASS;
+ int res;
+ int c;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_print_msg(
+ "%s: Test the futex value of private file mappings in FUTEX_WAIT\n",
+ basename(argv[0]));
+
+ ret = pthread_create(&thr, NULL, thr_futex_wait, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "pthread_create error\n");
+ ret = RET_ERROR;
+ goto out;
+ }
+
+ info("wait a while\n");
+ usleep(WAKE_WAIT_US);
+ val = 2;
+ res = futex_wake(&val, 1, 0);
+ info("futex_wake %d\n", res);
+ if (res != 1) {
+ fail("FUTEX_WAKE didn't find the waiting thread.\n");
+ ret = RET_FAIL;
+ }
+
+ info("join\n");
+ pthread_join(thr, NULL);
+
+ out:
+ print_result(TEST_NAME, ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
new file mode 100644
index 000000000..bab3dfe17
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -0,0 +1,89 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Block on a futex and wait for timeout.
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-wait-timeout"
+
+static long timeout_ns = 100000; /* 100us default timeout */
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -t N Timeout in nanoseconds (default: 100,000)\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int main(int argc, char *argv[])
+{
+ futex_t f1 = FUTEX_INITIALIZER;
+ struct timespec to;
+ int res, ret = RET_PASS;
+ int c;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 't':
+ timeout_ns = atoi(optarg);
+ break;
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_print_msg("%s: Block on a futex and wait for timeout\n",
+ basename(argv[0]));
+ ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
+
+ /* initialize timeout */
+ to.tv_sec = 0;
+ to.tv_nsec = timeout_ns;
+
+ info("Calling futex_wait on f1: %u @ %p\n", f1, &f1);
+ res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG);
+ if (!res || errno != ETIMEDOUT) {
+ fail("futex_wait returned %d\n", ret < 0 ? errno : ret);
+ ret = RET_FAIL;
+ }
+
+ print_result(TEST_NAME, ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
new file mode 100644
index 000000000..269753225
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
@@ -0,0 +1,126 @@
+/******************************************************************************
+ *
+ * Copyright FUJITSU LIMITED 2010
+ * Copyright KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Wait on uninitialized heap. It shold be zero and FUTEX_WAIT should
+ * return immediately. This test is intent to test zero page handling in
+ * futex.
+ *
+ * AUTHOR
+ * KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * HISTORY
+ * 2010-Jan-6: Initial version by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ *****************************************************************************/
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/futex.h>
+#include <libgen.h>
+
+#include "logging.h"
+#include "futextest.h"
+
+#define TEST_NAME "futex-wait-uninitialized-heap"
+#define WAIT_US 5000000
+
+static int child_blocked = 1;
+static int child_ret;
+void *buf;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+void *wait_thread(void *arg)
+{
+ int res;
+
+ child_ret = RET_PASS;
+ res = futex_wait(buf, 1, NULL, 0);
+ child_blocked = 0;
+
+ if (res != 0 && errno != EWOULDBLOCK) {
+ error("futex failure\n", errno);
+ child_ret = RET_ERROR;
+ }
+ pthread_exit(NULL);
+}
+
+int main(int argc, char **argv)
+{
+ int c, ret = RET_PASS;
+ long page_size;
+ pthread_t thr;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ buf = mmap(NULL, page_size, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+ if (buf == (void *)-1) {
+ error("mmap\n", errno);
+ exit(1);
+ }
+
+ ksft_print_header();
+ ksft_print_msg("%s: Test the uninitialized futex value in FUTEX_WAIT\n",
+ basename(argv[0]));
+
+
+ ret = pthread_create(&thr, NULL, wait_thread, NULL);
+ if (ret) {
+ error("pthread_create\n", errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+
+ info("waiting %dus for child to return\n", WAIT_US);
+ usleep(WAIT_US);
+
+ ret = child_ret;
+ if (child_blocked) {
+ fail("child blocked in kernel\n");
+ ret = RET_FAIL;
+ }
+
+ out:
+ print_result(TEST_NAME, ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
new file mode 100644
index 000000000..da15a6326
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -0,0 +1,81 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Test if FUTEX_WAIT op returns -EWOULDBLOCK if the futex value differs
+ * from the expected one.
+ *
+ * AUTHOR
+ * Gowrishankar <gowrishankar.m@in.ibm.com>
+ *
+ * HISTORY
+ * 2009-Nov-14: Initial version by Gowrishankar <gowrishankar.m@in.ibm.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-wait-wouldblock"
+#define timeout_ns 100000
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int main(int argc, char *argv[])
+{
+ struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns};
+ futex_t f1 = FUTEX_INITIALIZER;
+ int res, ret = RET_PASS;
+ int c;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n",
+ basename(argv[0]));
+
+ info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
+ res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG);
+ if (!res || errno != EWOULDBLOCK) {
+ fail("futex_wait returned: %d %s\n",
+ res ? errno : res, res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ }
+
+ print_result(TEST_NAME, ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
new file mode 100755
index 000000000..7ff002eed
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -0,0 +1,79 @@
+#!/bin/sh
+
+###############################################################################
+#
+# Copyright © International Business Machines Corp., 2009
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# DESCRIPTION
+# Run tests in the current directory.
+#
+# AUTHOR
+# Darren Hart <dvhart@linux.intel.com>
+#
+# HISTORY
+# 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+# 2010-Jan-6: Add futex_wait_uninitialized_heap and futex_wait_private_mapped_file
+# by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+#
+###############################################################################
+
+# Test for a color capable console
+if [ -z "$USE_COLOR" ]; then
+ tput setf 7 || tput setaf 7
+ if [ $? -eq 0 ]; then
+ USE_COLOR=1
+ tput sgr0
+ fi
+fi
+if [ "$USE_COLOR" -eq 1 ]; then
+ COLOR="-c"
+fi
+
+
+echo
+# requeue pi testing
+# without timeouts
+./futex_requeue_pi $COLOR
+./futex_requeue_pi $COLOR -b
+./futex_requeue_pi $COLOR -b -l
+./futex_requeue_pi $COLOR -b -o
+./futex_requeue_pi $COLOR -l
+./futex_requeue_pi $COLOR -o
+# with timeouts
+./futex_requeue_pi $COLOR -b -l -t 5000
+./futex_requeue_pi $COLOR -l -t 5000
+./futex_requeue_pi $COLOR -b -l -t 500000
+./futex_requeue_pi $COLOR -l -t 500000
+./futex_requeue_pi $COLOR -b -t 5000
+./futex_requeue_pi $COLOR -t 5000
+./futex_requeue_pi $COLOR -b -t 500000
+./futex_requeue_pi $COLOR -t 500000
+./futex_requeue_pi $COLOR -b -o -t 5000
+./futex_requeue_pi $COLOR -l -t 5000
+./futex_requeue_pi $COLOR -b -o -t 500000
+./futex_requeue_pi $COLOR -l -t 500000
+# with long timeout
+./futex_requeue_pi $COLOR -b -l -t 2000000000
+./futex_requeue_pi $COLOR -l -t 2000000000
+
+
+echo
+./futex_requeue_pi_mismatched_ops $COLOR
+
+echo
+./futex_requeue_pi_signal_restart $COLOR
+
+echo
+./futex_wait_timeout $COLOR
+
+echo
+./futex_wait_wouldblock $COLOR
+
+echo
+./futex_wait_uninitialized_heap $COLOR
+./futex_wait_private_mapped_file $COLOR
diff --git a/tools/testing/selftests/futex/include/atomic.h b/tools/testing/selftests/futex/include/atomic.h
new file mode 100644
index 000000000..f861da3e3
--- /dev/null
+++ b/tools/testing/selftests/futex/include/atomic.h
@@ -0,0 +1,83 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * GCC atomic builtin wrappers
+ * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-17: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _ATOMIC_H
+#define _ATOMIC_H
+
+typedef struct {
+ volatile int val;
+} atomic_t;
+
+#define ATOMIC_INITIALIZER { 0 }
+
+/**
+ * atomic_cmpxchg() - Atomic compare and exchange
+ * @uaddr: The address of the futex to be modified
+ * @oldval: The expected value of the futex
+ * @newval: The new value to try and assign the futex
+ *
+ * Return the old value of addr->val.
+ */
+static inline int
+atomic_cmpxchg(atomic_t *addr, int oldval, int newval)
+{
+ return __sync_val_compare_and_swap(&addr->val, oldval, newval);
+}
+
+/**
+ * atomic_inc() - Atomic incrememnt
+ * @addr: Address of the variable to increment
+ *
+ * Return the new value of addr->val.
+ */
+static inline int
+atomic_inc(atomic_t *addr)
+{
+ return __sync_add_and_fetch(&addr->val, 1);
+}
+
+/**
+ * atomic_dec() - Atomic decrement
+ * @addr: Address of the variable to decrement
+ *
+ * Return the new value of addr-val.
+ */
+static inline int
+atomic_dec(atomic_t *addr)
+{
+ return __sync_sub_and_fetch(&addr->val, 1);
+}
+
+/**
+ * atomic_set() - Atomic set
+ * @addr: Address of the variable to set
+ * @newval: New value for the atomic_t
+ *
+ * Return the new value of addr->val.
+ */
+static inline int
+atomic_set(atomic_t *addr, int newval)
+{
+ addr->val = newval;
+ return newval;
+}
+
+#endif
diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h
new file mode 100644
index 000000000..b98c3aba7
--- /dev/null
+++ b/tools/testing/selftests/futex/include/futextest.h
@@ -0,0 +1,266 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Glibc independent futex library for testing kernel functionality.
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _FUTEXTEST_H
+#define _FUTEXTEST_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <linux/futex.h>
+
+typedef volatile u_int32_t futex_t;
+#define FUTEX_INITIALIZER 0
+
+/* Define the newer op codes if the system header file is not up to date. */
+#ifndef FUTEX_WAIT_BITSET
+#define FUTEX_WAIT_BITSET 9
+#endif
+#ifndef FUTEX_WAKE_BITSET
+#define FUTEX_WAKE_BITSET 10
+#endif
+#ifndef FUTEX_WAIT_REQUEUE_PI
+#define FUTEX_WAIT_REQUEUE_PI 11
+#endif
+#ifndef FUTEX_CMP_REQUEUE_PI
+#define FUTEX_CMP_REQUEUE_PI 12
+#endif
+#ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE
+#define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \
+ FUTEX_PRIVATE_FLAG)
+#endif
+#ifndef FUTEX_REQUEUE_PI_PRIVATE
+#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \
+ FUTEX_PRIVATE_FLAG)
+#endif
+
+/**
+ * futex() - SYS_futex syscall wrapper
+ * @uaddr: address of first futex
+ * @op: futex op code
+ * @val: typically expected value of uaddr, but varies by op
+ * @timeout: typically an absolute struct timespec (except where noted
+ * otherwise). Overloaded by some ops
+ * @uaddr2: address of second futex for some ops\
+ * @val3: varies by op
+ * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG
+ *
+ * futex() is used by all the following futex op wrappers. It can also be
+ * used for misuse and abuse testing. Generally, the specific op wrappers
+ * should be used instead. It is a macro instead of an static inline function as
+ * some of the types over overloaded (timeout is used for nr_requeue for
+ * example).
+ *
+ * These argument descriptions are the defaults for all
+ * like-named arguments in the following wrappers except where noted below.
+ */
+#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \
+ syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3)
+
+/**
+ * futex_wait() - block on uaddr with optional timeout
+ * @timeout: relative timeout
+ */
+static inline int
+futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags)
+{
+ return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_wake() - wake one or more tasks blocked on uaddr
+ * @nr_wake: wake up to this many tasks
+ */
+static inline int
+futex_wake(futex_t *uaddr, int nr_wake, int opflags)
+{
+ return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags);
+}
+
+/**
+ * futex_wait_bitset() - block on uaddr with bitset
+ * @bitset: bitset to be used with futex_wake_bitset
+ */
+static inline int
+futex_wait_bitset(futex_t *uaddr, futex_t val, struct timespec *timeout,
+ u_int32_t bitset, int opflags)
+{
+ return futex(uaddr, FUTEX_WAIT_BITSET, val, timeout, NULL, bitset,
+ opflags);
+}
+
+/**
+ * futex_wake_bitset() - wake one or more tasks blocked on uaddr with bitset
+ * @bitset: bitset to compare with that used in futex_wait_bitset
+ */
+static inline int
+futex_wake_bitset(futex_t *uaddr, int nr_wake, u_int32_t bitset, int opflags)
+{
+ return futex(uaddr, FUTEX_WAKE_BITSET, nr_wake, NULL, NULL, bitset,
+ opflags);
+}
+
+/**
+ * futex_lock_pi() - block on uaddr as a PI mutex
+ * @detect: whether (1) or not (0) to perform deadlock detection
+ */
+static inline int
+futex_lock_pi(futex_t *uaddr, struct timespec *timeout, int detect,
+ int opflags)
+{
+ return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter
+ */
+static inline int
+futex_unlock_pi(futex_t *uaddr, int opflags)
+{
+ return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags);
+}
+
+/**
+ * futex_wake_op() - FIXME: COME UP WITH A GOOD ONE LINE DESCRIPTION
+ */
+static inline int
+futex_wake_op(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_wake2,
+ int wake_op, int opflags)
+{
+ return futex(uaddr, FUTEX_WAKE_OP, nr_wake, nr_wake2, uaddr2, wake_op,
+ opflags);
+}
+
+/**
+ * futex_requeue() - requeue without expected value comparison, deprecated
+ * @nr_wake: wake up to this many tasks
+ * @nr_requeue: requeue up to this many tasks
+ *
+ * Due to its inherently racy implementation, futex_requeue() is deprecated in
+ * favor of futex_cmp_requeue().
+ */
+static inline int
+futex_requeue(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_requeue,
+ int opflags)
+{
+ return futex(uaddr, FUTEX_REQUEUE, nr_wake, nr_requeue, uaddr2, 0,
+ opflags);
+}
+
+/**
+ * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
+ * @nr_wake: wake up to this many tasks
+ * @nr_requeue: requeue up to this many tasks
+ */
+static inline int
+futex_cmp_requeue(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake,
+ int nr_requeue, int opflags)
+{
+ return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
+ val, opflags);
+}
+
+/**
+ * futex_wait_requeue_pi() - block on uaddr and prepare to requeue to uaddr2
+ * @uaddr: non-PI futex source
+ * @uaddr2: PI futex target
+ *
+ * This is the first half of the requeue_pi mechanism. It shall always be
+ * paired with futex_cmp_requeue_pi().
+ */
+static inline int
+futex_wait_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2,
+ struct timespec *timeout, int opflags)
+{
+ return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0,
+ opflags);
+}
+
+/**
+ * futex_cmp_requeue_pi() - requeue tasks from uaddr to uaddr2 (PI aware)
+ * @uaddr: non-PI futex source
+ * @uaddr2: PI futex target
+ * @nr_wake: wake up to this many tasks
+ * @nr_requeue: requeue up to this many tasks
+ */
+static inline int
+futex_cmp_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake,
+ int nr_requeue, int opflags)
+{
+ return futex(uaddr, FUTEX_CMP_REQUEUE_PI, nr_wake, nr_requeue, uaddr2,
+ val, opflags);
+}
+
+/**
+ * futex_cmpxchg() - atomic compare and exchange
+ * @uaddr: The address of the futex to be modified
+ * @oldval: The expected value of the futex
+ * @newval: The new value to try and assign the futex
+ *
+ * Implement cmpxchg using gcc atomic builtins.
+ * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html
+ *
+ * Return the old futex value.
+ */
+static inline u_int32_t
+futex_cmpxchg(futex_t *uaddr, u_int32_t oldval, u_int32_t newval)
+{
+ return __sync_val_compare_and_swap(uaddr, oldval, newval);
+}
+
+/**
+ * futex_dec() - atomic decrement of the futex value
+ * @uaddr: The address of the futex to be modified
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_dec(futex_t *uaddr)
+{
+ return __sync_sub_and_fetch(uaddr, 1);
+}
+
+/**
+ * futex_inc() - atomic increment of the futex value
+ * @uaddr: the address of the futex to be modified
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_inc(futex_t *uaddr)
+{
+ return __sync_add_and_fetch(uaddr, 1);
+}
+
+/**
+ * futex_set() - atomic decrement of the futex value
+ * @uaddr: the address of the futex to be modified
+ * @newval: New value for the atomic_t
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_set(futex_t *uaddr, u_int32_t newval)
+{
+ *uaddr = newval;
+ return newval;
+}
+
+#endif
diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h
new file mode 100644
index 000000000..01989644e
--- /dev/null
+++ b/tools/testing/selftests/futex/include/logging.h
@@ -0,0 +1,152 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Glibc independent futex library for testing kernel functionality.
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _LOGGING_H
+#define _LOGGING_H
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/futex.h>
+#include "kselftest.h"
+
+/*
+ * Define PASS, ERROR, and FAIL strings with and without color escape
+ * sequences, default to no color.
+ */
+#define ESC 0x1B, '['
+#define BRIGHT '1'
+#define GREEN '3', '2'
+#define YELLOW '3', '3'
+#define RED '3', '1'
+#define ESCEND 'm'
+#define BRIGHT_GREEN ESC, BRIGHT, ';', GREEN, ESCEND
+#define BRIGHT_YELLOW ESC, BRIGHT, ';', YELLOW, ESCEND
+#define BRIGHT_RED ESC, BRIGHT, ';', RED, ESCEND
+#define RESET_COLOR ESC, '0', 'm'
+static const char PASS_COLOR[] = {BRIGHT_GREEN, ' ', 'P', 'A', 'S', 'S',
+ RESET_COLOR, 0};
+static const char ERROR_COLOR[] = {BRIGHT_YELLOW, 'E', 'R', 'R', 'O', 'R',
+ RESET_COLOR, 0};
+static const char FAIL_COLOR[] = {BRIGHT_RED, ' ', 'F', 'A', 'I', 'L',
+ RESET_COLOR, 0};
+static const char INFO_NORMAL[] = " INFO";
+static const char PASS_NORMAL[] = " PASS";
+static const char ERROR_NORMAL[] = "ERROR";
+static const char FAIL_NORMAL[] = " FAIL";
+const char *INFO = INFO_NORMAL;
+const char *PASS = PASS_NORMAL;
+const char *ERROR = ERROR_NORMAL;
+const char *FAIL = FAIL_NORMAL;
+
+/* Verbosity setting for INFO messages */
+#define VQUIET 0
+#define VCRITICAL 1
+#define VINFO 2
+#define VMAX VINFO
+int _verbose = VCRITICAL;
+
+/* Functional test return codes */
+#define RET_PASS 0
+#define RET_ERROR -1
+#define RET_FAIL -2
+
+/**
+ * log_color() - Use colored output for PASS, ERROR, and FAIL strings
+ * @use_color: use color (1) or not (0)
+ */
+void log_color(int use_color)
+{
+ if (use_color) {
+ PASS = PASS_COLOR;
+ ERROR = ERROR_COLOR;
+ FAIL = FAIL_COLOR;
+ } else {
+ PASS = PASS_NORMAL;
+ ERROR = ERROR_NORMAL;
+ FAIL = FAIL_NORMAL;
+ }
+}
+
+/**
+ * log_verbosity() - Set verbosity of test output
+ * @verbose: Enable (1) verbose output or not (0)
+ *
+ * Currently setting verbose=1 will enable INFO messages and 0 will disable
+ * them. FAIL and ERROR messages are always displayed.
+ */
+void log_verbosity(int level)
+{
+ if (level > VMAX)
+ level = VMAX;
+ else if (level < 0)
+ level = 0;
+ _verbose = level;
+}
+
+/**
+ * print_result() - Print standard PASS | ERROR | FAIL results
+ * @ret: the return value to be considered: 0 | RET_ERROR | RET_FAIL
+ *
+ * print_result() is primarily intended for functional tests.
+ */
+void print_result(const char *test_name, int ret)
+{
+ switch (ret) {
+ case RET_PASS:
+ ksft_test_result_pass("%s\n", test_name);
+ ksft_print_cnts();
+ return;
+ case RET_ERROR:
+ ksft_test_result_error("%s\n", test_name);
+ ksft_print_cnts();
+ return;
+ case RET_FAIL:
+ ksft_test_result_fail("%s\n", test_name);
+ ksft_print_cnts();
+ return;
+ }
+}
+
+/* log level macros */
+#define info(message, vargs...) \
+do { \
+ if (_verbose >= VINFO) \
+ fprintf(stderr, "\t%s: "message, INFO, ##vargs); \
+} while (0)
+
+#define error(message, err, args...) \
+do { \
+ if (_verbose >= VCRITICAL) {\
+ if (err) \
+ fprintf(stderr, "\t%s: %s: "message, \
+ ERROR, strerror(err), ##args); \
+ else \
+ fprintf(stderr, "\t%s: "message, ERROR, ##args); \
+ } \
+} while (0)
+
+#define fail(message, args...) \
+do { \
+ if (_verbose >= VCRITICAL) \
+ fprintf(stderr, "\t%s: "message, FAIL, ##args); \
+} while (0)
+
+#endif
diff --git a/tools/testing/selftests/futex/run.sh b/tools/testing/selftests/futex/run.sh
new file mode 100755
index 000000000..88bcb1767
--- /dev/null
+++ b/tools/testing/selftests/futex/run.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+###############################################################################
+#
+# Copyright © International Business Machines Corp., 2009
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# DESCRIPTION
+# Run all tests under the functional, performance, and stress directories.
+# Format and summarize the results.
+#
+# AUTHOR
+# Darren Hart <dvhart@linux.intel.com>
+#
+# HISTORY
+# 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+#
+###############################################################################
+
+# Test for a color capable shell and pass the result to the subdir scripts
+USE_COLOR=0
+tput setf 7 || tput setaf 7
+if [ $? -eq 0 ]; then
+ USE_COLOR=1
+ tput sgr0
+fi
+export USE_COLOR
+
+(cd functional; ./run.sh)
diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh
new file mode 100755
index 000000000..a27e2eec3
--- /dev/null
+++ b/tools/testing/selftests/gen_kselftest_tar.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+#
+# SPDX-License-Identifier: GPL-2.0
+# gen_kselftest_tar
+# Generate kselftest tarball
+# Author: Shuah Khan <shuahkh@osg.samsung.com>
+# Copyright (C) 2015 Samsung Electronics Co., Ltd.
+
+# main
+main()
+{
+ if [ "$#" -eq 0 ]; then
+ echo "$0: Generating default compression gzip"
+ copts="cvzf"
+ ext=".tar.gz"
+ else
+ case "$1" in
+ tar)
+ copts="cvf"
+ ext=".tar"
+ ;;
+ targz)
+ copts="cvzf"
+ ext=".tar.gz"
+ ;;
+ tarbz2)
+ copts="cvjf"
+ ext=".tar.bz2"
+ ;;
+ tarxz)
+ copts="cvJf"
+ ext=".tar.xz"
+ ;;
+ *)
+ echo "Unknown tarball format $1"
+ exit 1
+ ;;
+ esac
+ fi
+
+ install_dir=./kselftest
+
+# Run install using INSTALL_KSFT_PATH override to generate install
+# directory
+./kselftest_install.sh
+tar $copts kselftest${ext} $install_dir
+echo "Kselftest archive kselftest${ext} created!"
+
+# clean up install directory
+rm -rf kselftest
+}
+
+main "$@"
diff --git a/tools/testing/selftests/gpio/.gitignore b/tools/testing/selftests/gpio/.gitignore
new file mode 100644
index 000000000..7d14f743d
--- /dev/null
+++ b/tools/testing/selftests/gpio/.gitignore
@@ -0,0 +1 @@
+gpio-mockup-chardev
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
new file mode 100644
index 000000000..59ea4c461
--- /dev/null
+++ b/tools/testing/selftests/gpio/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_PROGS := gpio-mockup.sh
+TEST_FILES := gpio-mockup-sysfs.sh $(BINARIES)
+BINARIES := gpio-mockup-chardev
+EXTRA_PROGS := ../gpiogpio-event-mon ../gpiogpio-hammer ../gpiolsgpio
+EXTRA_DIRS := ../gpioinclude/
+EXTRA_OBJS := ../gpiogpio-event-mon-in.o ../gpiogpio-event-mon.o
+EXTRA_OBJS += ../gpiogpio-hammer-in.o ../gpiogpio-utils.o ../gpiolsgpio-in.o
+EXTRA_OBJS += ../gpiolsgpio.o
+
+KSFT_KHDR_INSTALL := 1
+include ../lib.mk
+
+all: $(BINARIES)
+
+override define CLEAN
+ $(RM) $(BINARIES) $(EXTRA_PROGS) $(EXTRA_OBJS)
+ $(RM) -r $(EXTRA_DIRS)
+endef
+
+CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/
+LDLIBS += -lmount -I/usr/include/libmount
+
+$(BINARIES):| khdr
+$(BINARIES): ../../../gpio/gpio-utils.o
+
+../../../gpio/gpio-utils.o:
+ make ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) -C ../../../gpio
diff --git a/tools/testing/selftests/gpio/config b/tools/testing/selftests/gpio/config
new file mode 100644
index 000000000..abaa6902b
--- /dev/null
+++ b/tools/testing/selftests/gpio/config
@@ -0,0 +1,2 @@
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_MOCKUP=m
diff --git a/tools/testing/selftests/gpio/gpio-mockup-chardev.c b/tools/testing/selftests/gpio/gpio-mockup-chardev.c
new file mode 100644
index 000000000..aaa1e9f08
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-mockup-chardev.c
@@ -0,0 +1,327 @@
+/*
+ * GPIO chardev test helper
+ *
+ * Copyright (C) 2016 Bamvor Jian Zhang
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <libmount.h>
+#include <err.h>
+#include <dirent.h>
+#include <linux/gpio.h>
+#include "../../../gpio/gpio-utils.h"
+
+#define CONSUMER "gpio-selftest"
+#define GC_NUM 10
+enum direction {
+ OUT,
+ IN
+};
+
+static int get_debugfs(char **path)
+{
+ struct libmnt_context *cxt;
+ struct libmnt_table *tb;
+ struct libmnt_iter *itr = NULL;
+ struct libmnt_fs *fs;
+ int found = 0, ret;
+
+ cxt = mnt_new_context();
+ if (!cxt)
+ err(EXIT_FAILURE, "libmount context allocation failed");
+
+ itr = mnt_new_iter(MNT_ITER_FORWARD);
+ if (!itr)
+ err(EXIT_FAILURE, "failed to initialize libmount iterator");
+
+ if (mnt_context_get_mtab(cxt, &tb))
+ err(EXIT_FAILURE, "failed to read mtab");
+
+ while (mnt_table_next_fs(tb, itr, &fs) == 0) {
+ const char *type = mnt_fs_get_fstype(fs);
+
+ if (!strcmp(type, "debugfs")) {
+ found = 1;
+ break;
+ }
+ }
+ if (found) {
+ ret = asprintf(path, "%s/gpio", mnt_fs_get_target(fs));
+ if (ret < 0)
+ err(EXIT_FAILURE, "failed to format string");
+ }
+
+ mnt_free_iter(itr);
+ mnt_free_context(cxt);
+
+ if (!found)
+ return -1;
+
+ return 0;
+}
+
+static int gpio_debugfs_get(const char *consumer, int *dir, int *value)
+{
+ char *debugfs;
+ FILE *f;
+ char *line = NULL;
+ size_t len = 0;
+ char *cur;
+ int found = 0;
+
+ if (get_debugfs(&debugfs) != 0)
+ err(EXIT_FAILURE, "debugfs is not mounted");
+
+ f = fopen(debugfs, "r");
+ if (!f)
+ err(EXIT_FAILURE, "read from gpio debugfs failed");
+
+ /*
+ * gpio-2 ( |gpio-selftest ) in lo
+ */
+ while (getline(&line, &len, f) != -1) {
+ cur = strstr(line, consumer);
+ if (cur == NULL)
+ continue;
+
+ cur = strchr(line, ')');
+ if (!cur)
+ continue;
+
+ cur += 2;
+ if (!strncmp(cur, "out", 3)) {
+ *dir = OUT;
+ cur += 4;
+ } else if (!strncmp(cur, "in", 2)) {
+ *dir = IN;
+ cur += 4;
+ }
+
+ if (!strncmp(cur, "hi", 2))
+ *value = 1;
+ else if (!strncmp(cur, "lo", 2))
+ *value = 0;
+
+ found = 1;
+ break;
+ }
+ free(debugfs);
+ fclose(f);
+ free(line);
+
+ if (!found)
+ return -1;
+
+ return 0;
+}
+
+static struct gpiochip_info *list_gpiochip(const char *gpiochip_name, int *ret)
+{
+ struct gpiochip_info *cinfo;
+ struct gpiochip_info *current;
+ const struct dirent *ent;
+ DIR *dp;
+ char *chrdev_name;
+ int fd;
+ int i = 0;
+
+ cinfo = calloc(sizeof(struct gpiochip_info) * 4, GC_NUM + 1);
+ if (!cinfo)
+ err(EXIT_FAILURE, "gpiochip_info allocation failed");
+
+ current = cinfo;
+ dp = opendir("/dev");
+ if (!dp) {
+ *ret = -errno;
+ goto error_out;
+ } else {
+ *ret = 0;
+ }
+
+ while (ent = readdir(dp), ent) {
+ if (check_prefix(ent->d_name, "gpiochip")) {
+ *ret = asprintf(&chrdev_name, "/dev/%s", ent->d_name);
+ if (*ret < 0)
+ goto error_out;
+
+ fd = open(chrdev_name, 0);
+ if (fd == -1) {
+ *ret = -errno;
+ fprintf(stderr, "Failed to open %s\n",
+ chrdev_name);
+ goto error_close_dir;
+ }
+ *ret = ioctl(fd, GPIO_GET_CHIPINFO_IOCTL, current);
+ if (*ret == -1) {
+ perror("Failed to issue CHIPINFO IOCTL\n");
+ goto error_close_dir;
+ }
+ close(fd);
+ if (strcmp(current->label, gpiochip_name) == 0
+ || check_prefix(current->label, gpiochip_name)) {
+ *ret = 0;
+ current++;
+ i++;
+ }
+ }
+ }
+
+ if ((!*ret && i == 0) || *ret < 0) {
+ free(cinfo);
+ cinfo = NULL;
+ }
+ if (!*ret && i > 0) {
+ cinfo = realloc(cinfo, sizeof(struct gpiochip_info) * 4 * i);
+ *ret = i;
+ }
+
+error_close_dir:
+ closedir(dp);
+error_out:
+ if (*ret < 0)
+ err(EXIT_FAILURE, "list gpiochip failed: %s", strerror(*ret));
+
+ return cinfo;
+}
+
+int gpio_pin_test(struct gpiochip_info *cinfo, int line, int flag, int value)
+{
+ struct gpiohandle_data data;
+ unsigned int lines[] = {line};
+ int fd;
+ int debugfs_dir = IN;
+ int debugfs_value = 0;
+ int ret;
+
+ data.values[0] = value;
+ ret = gpiotools_request_linehandle(cinfo->name, lines, 1, flag, &data,
+ CONSUMER);
+ if (ret < 0)
+ goto fail_out;
+ else
+ fd = ret;
+
+ ret = gpio_debugfs_get(CONSUMER, &debugfs_dir, &debugfs_value);
+ if (ret) {
+ ret = -EINVAL;
+ goto fail_out;
+ }
+ if (flag & GPIOHANDLE_REQUEST_INPUT) {
+ if (debugfs_dir != IN) {
+ errno = -EINVAL;
+ ret = -errno;
+ }
+ } else if (flag & GPIOHANDLE_REQUEST_OUTPUT) {
+ if (flag & GPIOHANDLE_REQUEST_ACTIVE_LOW)
+ debugfs_value = !debugfs_value;
+
+ if (!(debugfs_dir == OUT && value == debugfs_value)) {
+ errno = -EINVAL;
+ ret = -errno;
+ }
+ }
+ gpiotools_release_linehandle(fd);
+
+fail_out:
+ if (ret)
+ err(EXIT_FAILURE, "gpio<%s> line<%d> test flag<0x%x> value<%d>",
+ cinfo->name, line, flag, value);
+
+ return ret;
+}
+
+void gpio_pin_tests(struct gpiochip_info *cinfo, unsigned int line)
+{
+ printf("line<%d>", line);
+ gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_OUTPUT, 0);
+ printf(".");
+ gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_OUTPUT, 1);
+ printf(".");
+ gpio_pin_test(cinfo, line,
+ GPIOHANDLE_REQUEST_OUTPUT | GPIOHANDLE_REQUEST_ACTIVE_LOW,
+ 0);
+ printf(".");
+ gpio_pin_test(cinfo, line,
+ GPIOHANDLE_REQUEST_OUTPUT | GPIOHANDLE_REQUEST_ACTIVE_LOW,
+ 1);
+ printf(".");
+ gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_INPUT, 0);
+ printf(".");
+}
+
+/*
+ * ./gpio-mockup-chardev gpio_chip_name_prefix is_valid_gpio_chip
+ * Return 0 if successful or exit with EXIT_FAILURE if test failed.
+ * gpio_chip_name_prefix: The prefix of gpiochip you want to test. E.g.
+ * gpio-mockup
+ * is_valid_gpio_chip: Whether the gpio_chip is valid. 1 means valid,
+ * 0 means invalid which could not be found by
+ * list_gpiochip.
+ */
+int main(int argc, char *argv[])
+{
+ char *prefix;
+ int valid;
+ struct gpiochip_info *cinfo;
+ struct gpiochip_info *current;
+ int i;
+ int ret;
+
+ if (argc < 3) {
+ printf("Usage: %s prefix is_valid", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ prefix = argv[1];
+ valid = strcmp(argv[2], "true") == 0 ? 1 : 0;
+
+ printf("Test gpiochip %s: ", prefix);
+ cinfo = list_gpiochip(prefix, &ret);
+ if (!cinfo) {
+ if (!valid && ret == 0) {
+ printf("Invalid test successful\n");
+ ret = 0;
+ goto out;
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
+ } else if (cinfo && !valid) {
+ ret = -EINVAL;
+ goto out;
+ }
+ current = cinfo;
+ for (i = 0; i < ret; i++) {
+ gpio_pin_tests(current, 0);
+ gpio_pin_tests(current, current->lines - 1);
+ gpio_pin_tests(current, random() % current->lines);
+ current++;
+ }
+ ret = 0;
+ printf("successful\n");
+
+out:
+ if (ret)
+ fprintf(stderr, "gpio<%s> test failed\n", prefix);
+
+ if (cinfo)
+ free(cinfo);
+
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh b/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh
new file mode 100755
index 000000000..dd269d877
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh
@@ -0,0 +1,135 @@
+
+# SPDX-License-Identifier: GPL-2.0
+is_consistent()
+{
+ val=
+
+ active_low_sysfs=`cat $GPIO_SYSFS/gpio$nr/active_low`
+ val_sysfs=`cat $GPIO_SYSFS/gpio$nr/value`
+ dir_sysfs=`cat $GPIO_SYSFS/gpio$nr/direction`
+
+ gpio_this_debugfs=`cat $GPIO_DEBUGFS |grep "gpio-$nr" | sed "s/(.*)//g"`
+ dir_debugfs=`echo $gpio_this_debugfs | awk '{print $2}'`
+ val_debugfs=`echo $gpio_this_debugfs | awk '{print $3}'`
+ if [ $val_debugfs = "lo" ]; then
+ val=0
+ elif [ $val_debugfs = "hi" ]; then
+ val=1
+ fi
+
+ if [ $active_low_sysfs = "1" ]; then
+ if [ $val = "0" ]; then
+ val="1"
+ else
+ val="0"
+ fi
+ fi
+
+ if [ $val_sysfs = $val ] && [ $dir_sysfs = $dir_debugfs ]; then
+ echo -n "."
+ else
+ echo "test fail, exit"
+ die
+ fi
+}
+
+test_pin_logic()
+{
+ nr=$1
+ direction=$2
+ active_low=$3
+ value=$4
+
+ echo $direction > $GPIO_SYSFS/gpio$nr/direction
+ echo $active_low > $GPIO_SYSFS/gpio$nr/active_low
+ if [ $direction = "out" ]; then
+ echo $value > $GPIO_SYSFS/gpio$nr/value
+ fi
+ is_consistent $nr
+}
+
+test_one_pin()
+{
+ nr=$1
+
+ echo -n "test pin<$nr>"
+
+ echo $nr > $GPIO_SYSFS/export 2>/dev/null
+
+ if [ X$? != X0 ]; then
+ echo "test GPIO pin $nr failed"
+ die
+ fi
+
+ #"Checking if the sysfs is consistent with debugfs: "
+ is_consistent $nr
+
+ #"Checking the logic of active_low: "
+ test_pin_logic $nr out 1 1
+ test_pin_logic $nr out 1 0
+ test_pin_logic $nr out 0 1
+ test_pin_logic $nr out 0 0
+
+ #"Checking the logic of direction: "
+ test_pin_logic $nr in 1 1
+ test_pin_logic $nr out 1 0
+ test_pin_logic $nr low 0 1
+ test_pin_logic $nr high 0 0
+
+ echo $nr > $GPIO_SYSFS/unexport
+
+ echo "successful"
+}
+
+test_one_pin_fail()
+{
+ nr=$1
+
+ echo $nr > $GPIO_SYSFS/export 2>/dev/null
+
+ if [ X$? != X0 ]; then
+ echo "test invalid pin $nr successful"
+ else
+ echo "test invalid pin $nr failed"
+ echo $nr > $GPIO_SYSFS/unexport 2>/dev/null
+ die
+ fi
+}
+
+list_chip()
+{
+ echo `ls -d $GPIO_DRV_SYSFS/gpiochip* 2>/dev/null`
+}
+
+test_chip()
+{
+ chip=$1
+ name=`basename $chip`
+ base=`cat $chip/base`
+ ngpio=`cat $chip/ngpio`
+ printf "%-10s %-5s %-5s\n" $name $base $ngpio
+ if [ $ngpio = "0" ]; then
+ echo "number of gpio is zero is not allowed".
+ fi
+ test_one_pin $base
+ test_one_pin $(($base + $ngpio - 1))
+ test_one_pin $((( RANDOM % $ngpio ) + $base ))
+}
+
+test_chips_sysfs()
+{
+ gpiochip=`list_chip $module`
+ if [ X"$gpiochip" = X ]; then
+ if [ X"$valid" = Xfalse ]; then
+ echo "successful"
+ else
+ echo "fail"
+ die
+ fi
+ else
+ for chip in $gpiochip; do
+ test_chip $chip
+ done
+ fi
+}
+
diff --git a/tools/testing/selftests/gpio/gpio-mockup.sh b/tools/testing/selftests/gpio/gpio-mockup.sh
new file mode 100755
index 000000000..7f35b9880
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-mockup.sh
@@ -0,0 +1,206 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+#exit status
+#1: Internal error
+#2: sysfs/debugfs not mount
+#3: insert module fail when gpio-mockup is a module.
+#4: Skip test including run as non-root user.
+#5: other reason.
+
+SYSFS=
+GPIO_SYSFS=
+GPIO_DRV_SYSFS=
+DEBUGFS=
+GPIO_DEBUGFS=
+dev_type=
+module=
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+usage()
+{
+ echo "Usage:"
+ echo "$0 [-f] [-m name] [-t type]"
+ echo "-f: full test. It maybe conflict with existence gpio device."
+ echo "-m: module name, default name is gpio-mockup. It could also test"
+ echo " other gpio device."
+ echo "-t: interface type: chardev(char device) and sysfs(being"
+ echo " deprecated). The first one is default"
+ echo ""
+ echo "$0 -h"
+ echo "This usage"
+}
+
+prerequisite()
+{
+ msg="skip all tests:"
+ if [ $UID != 0 ]; then
+ echo $msg must be run as root >&2
+ exit $ksft_skip
+ fi
+ SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+ if [ ! -d "$SYSFS" ]; then
+ echo $msg sysfs is not mounted >&2
+ exit 2
+ fi
+ GPIO_SYSFS=`echo $SYSFS/class/gpio`
+ GPIO_DRV_SYSFS=`echo $SYSFS/devices/platform/$module/gpio`
+ DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
+ if [ ! -d "$DEBUGFS" ]; then
+ echo $msg debugfs is not mounted >&2
+ exit 2
+ fi
+ GPIO_DEBUGFS=`echo $DEBUGFS/gpio`
+ source gpio-mockup-sysfs.sh
+}
+
+try_insert_module()
+{
+ if [ -d "$GPIO_DRV_SYSFS" ]; then
+ echo "$GPIO_DRV_SYSFS exist. Skip insert module"
+ else
+ modprobe -q $module $1
+ if [ X$? != X0 ]; then
+ echo $msg insmod $module failed >&2
+ exit 3
+ fi
+ fi
+}
+
+remove_module()
+{
+ modprobe -r -q $module
+}
+
+die()
+{
+ remove_module
+ exit 5
+}
+
+test_chips()
+{
+ if [ X$dev_type = Xsysfs ]; then
+ echo "WARNING: sysfs ABI of gpio is going to deprecated."
+ test_chips_sysfs $*
+ else
+ $BASE/gpio-mockup-chardev $*
+ fi
+}
+
+gpio_test()
+{
+ param=$1
+ valid=$2
+
+ if [ X"$param" = X ]; then
+ die
+ fi
+ try_insert_module "gpio_mockup_ranges=$param"
+ echo -n "GPIO $module test with ranges: <"
+ echo "$param>: "
+ printf "%-10s %s\n" $param
+ test_chips $module $valid
+ remove_module
+}
+
+BASE=`dirname $0`
+
+dev_type=
+TEMP=`getopt -o fhm:t: -n '$0' -- "$@"`
+
+if [ "$?" != "0" ]; then
+ echo "Parameter process failed, Terminating..." >&2
+ exit 1
+fi
+
+# Note the quotes around `$TEMP': they are essential!
+eval set -- "$TEMP"
+
+while true; do
+ case $1 in
+ -f)
+ full_test=true
+ shift
+ ;;
+ -h)
+ usage
+ exit
+ ;;
+ -m)
+ module=$2
+ shift 2
+ ;;
+ -t)
+ dev_type=$2
+ shift 2
+ ;;
+ --)
+ shift
+ break
+ ;;
+ *)
+ echo "Internal error!"
+ exit 1
+ ;;
+ esac
+done
+
+if [ X"$module" = X ]; then
+ module="gpio-mockup"
+fi
+
+if [ X$dev_type != Xsysfs ]; then
+ dev_type="chardev"
+fi
+
+prerequisite
+
+echo "1. Test dynamic allocation of gpio successful means insert gpiochip and"
+echo " manipulate gpio pin successful"
+gpio_test "-1,32" true
+gpio_test "-1,32,-1,32" true
+gpio_test "-1,32,-1,32,-1,32" true
+if [ X$full_test = Xtrue ]; then
+ gpio_test "-1,32,32,64" true
+ gpio_test "-1,32,40,64,-1,5" true
+ gpio_test "-1,32,32,64,-1,32" true
+ gpio_test "0,32,32,64,-1,32,-1,32" true
+ gpio_test "-1,32,-1,32,0,32,32,64" true
+ echo "2. Do basic test: successful means insert gpiochip and"
+ echo " manipulate gpio pin successful"
+ gpio_test "0,32" true
+ gpio_test "0,32,32,64" true
+ gpio_test "0,32,40,64,64,96" true
+fi
+echo "3. Error test: successful means insert gpiochip failed"
+echo "3.1 Test number of gpio overflow"
+#Currently: The max number of gpio(1024) is defined in arm architecture.
+gpio_test "-1,32,-1,1024" false
+if [ X$full_test = Xtrue ]; then
+ echo "3.2 Test zero line of gpio"
+ gpio_test "0,0" false
+ echo "3.3 Test range overlap"
+ echo "3.3.1 Test corner case"
+ gpio_test "0,32,0,1" false
+ gpio_test "0,32,32,64,32,40" false
+ gpio_test "0,32,35,64,35,45" false
+ gpio_test "0,32,31,32" false
+ gpio_test "0,32,32,64,36,37" false
+ gpio_test "0,32,35,64,34,36" false
+ echo "3.3.2 Test inserting invalid second gpiochip"
+ gpio_test "0,32,30,35" false
+ gpio_test "0,32,1,5" false
+ gpio_test "10,32,9,14" false
+ gpio_test "10,32,30,35" false
+ echo "3.3.3 Test others"
+ gpio_test "0,32,40,56,39,45" false
+ gpio_test "0,32,40,56,30,33" false
+ gpio_test "0,32,40,56,30,41" false
+ gpio_test "0,32,40,56,20,21" false
+fi
+
+echo GPIO test PASS
+
diff --git a/tools/testing/selftests/ia64/.gitignore b/tools/testing/selftests/ia64/.gitignore
new file mode 100644
index 000000000..ab806edc8
--- /dev/null
+++ b/tools/testing/selftests/ia64/.gitignore
@@ -0,0 +1 @@
+aliasing-test
diff --git a/tools/testing/selftests/ia64/Makefile b/tools/testing/selftests/ia64/Makefile
new file mode 100644
index 000000000..4bce1a84b
--- /dev/null
+++ b/tools/testing/selftests/ia64/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_PROGS := aliasing-test
+
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+ rm -fr $(TEST_PROGS)
diff --git a/tools/testing/selftests/ia64/aliasing-test.c b/tools/testing/selftests/ia64/aliasing-test.c
new file mode 100644
index 000000000..62a190d45
--- /dev/null
+++ b/tools/testing/selftests/ia64/aliasing-test.c
@@ -0,0 +1,263 @@
+/*
+ * Exercise /dev/mem mmap cases that have been troublesome in the past
+ *
+ * (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
+ * Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/pci.h>
+
+int sum;
+
+static int map_mem(char *path, off_t offset, size_t length, int touch)
+{
+ int fd, rc;
+ void *addr;
+ int *c;
+
+ fd = open(path, O_RDWR);
+ if (fd == -1) {
+ perror(path);
+ return -1;
+ }
+
+ if (fnmatch("/proc/bus/pci/*", path, 0) == 0) {
+ rc = ioctl(fd, PCIIOC_MMAP_IS_MEM);
+ if (rc == -1)
+ perror("PCIIOC_MMAP_IS_MEM ioctl");
+ }
+
+ addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, offset);
+ if (addr == MAP_FAILED)
+ return 1;
+
+ if (touch) {
+ c = (int *) addr;
+ while (c < (int *) (addr + length))
+ sum += *c++;
+ }
+
+ rc = munmap(addr, length);
+ if (rc == -1) {
+ perror("munmap");
+ return -1;
+ }
+
+ close(fd);
+ return 0;
+}
+
+static int scan_tree(char *path, char *file, off_t offset, size_t length, int touch)
+{
+ struct dirent **namelist;
+ char *name, *path2;
+ int i, n, r, rc = 0, result = 0;
+ struct stat buf;
+
+ n = scandir(path, &namelist, 0, alphasort);
+ if (n < 0) {
+ perror("scandir");
+ return -1;
+ }
+
+ for (i = 0; i < n; i++) {
+ name = namelist[i]->d_name;
+
+ if (fnmatch(".", name, 0) == 0)
+ goto skip;
+ if (fnmatch("..", name, 0) == 0)
+ goto skip;
+
+ path2 = malloc(strlen(path) + strlen(name) + 3);
+ strcpy(path2, path);
+ strcat(path2, "/");
+ strcat(path2, name);
+
+ if (fnmatch(file, name, 0) == 0) {
+ rc = map_mem(path2, offset, length, touch);
+ if (rc == 0)
+ fprintf(stderr, "PASS: %s 0x%lx-0x%lx is %s\n", path2, offset, offset + length, touch ? "readable" : "mappable");
+ else if (rc > 0)
+ fprintf(stderr, "PASS: %s 0x%lx-0x%lx not mappable\n", path2, offset, offset + length);
+ else {
+ fprintf(stderr, "FAIL: %s 0x%lx-0x%lx not accessible\n", path2, offset, offset + length);
+ return rc;
+ }
+ } else {
+ r = lstat(path2, &buf);
+ if (r == 0 && S_ISDIR(buf.st_mode)) {
+ rc = scan_tree(path2, file, offset, length, touch);
+ if (rc < 0)
+ return rc;
+ }
+ }
+
+ result |= rc;
+ free(path2);
+
+skip:
+ free(namelist[i]);
+ }
+ free(namelist);
+ return result;
+}
+
+char buf[1024];
+
+static int read_rom(char *path)
+{
+ int fd, rc;
+ size_t size = 0;
+
+ fd = open(path, O_RDWR);
+ if (fd == -1) {
+ perror(path);
+ return -1;
+ }
+
+ rc = write(fd, "1", 2);
+ if (rc <= 0) {
+ close(fd);
+ perror("write");
+ return -1;
+ }
+
+ do {
+ rc = read(fd, buf, sizeof(buf));
+ if (rc > 0)
+ size += rc;
+ } while (rc > 0);
+
+ close(fd);
+ return size;
+}
+
+static int scan_rom(char *path, char *file)
+{
+ struct dirent **namelist;
+ char *name, *path2;
+ int i, n, r, rc = 0, result = 0;
+ struct stat buf;
+
+ n = scandir(path, &namelist, 0, alphasort);
+ if (n < 0) {
+ perror("scandir");
+ return -1;
+ }
+
+ for (i = 0; i < n; i++) {
+ name = namelist[i]->d_name;
+
+ if (fnmatch(".", name, 0) == 0)
+ goto skip;
+ if (fnmatch("..", name, 0) == 0)
+ goto skip;
+
+ path2 = malloc(strlen(path) + strlen(name) + 3);
+ strcpy(path2, path);
+ strcat(path2, "/");
+ strcat(path2, name);
+
+ if (fnmatch(file, name, 0) == 0) {
+ rc = read_rom(path2);
+
+ /*
+ * It's OK if the ROM is unreadable. Maybe there
+ * is no ROM, or some other error occurred. The
+ * important thing is that no MCA happened.
+ */
+ if (rc > 0)
+ fprintf(stderr, "PASS: %s read %d bytes\n", path2, rc);
+ else {
+ fprintf(stderr, "PASS: %s not readable\n", path2);
+ return rc;
+ }
+ } else {
+ r = lstat(path2, &buf);
+ if (r == 0 && S_ISDIR(buf.st_mode)) {
+ rc = scan_rom(path2, file);
+ if (rc < 0)
+ return rc;
+ }
+ }
+
+ result |= rc;
+ free(path2);
+
+skip:
+ free(namelist[i]);
+ }
+ free(namelist);
+ return result;
+}
+
+int main(void)
+{
+ int rc;
+
+ if (map_mem("/dev/mem", 0, 0xA0000, 1) == 0)
+ fprintf(stderr, "PASS: /dev/mem 0x0-0xa0000 is readable\n");
+ else
+ fprintf(stderr, "FAIL: /dev/mem 0x0-0xa0000 not accessible\n");
+
+ /*
+ * It's not safe to blindly read the VGA frame buffer. If you know
+ * how to poke the card the right way, it should respond, but it's
+ * not safe in general. Many machines, e.g., Intel chipsets, cover
+ * up a non-responding card by just returning -1, but others will
+ * report the failure as a machine check.
+ */
+ if (map_mem("/dev/mem", 0xA0000, 0x20000, 0) == 0)
+ fprintf(stderr, "PASS: /dev/mem 0xa0000-0xc0000 is mappable\n");
+ else
+ fprintf(stderr, "FAIL: /dev/mem 0xa0000-0xc0000 not accessible\n");
+
+ if (map_mem("/dev/mem", 0xC0000, 0x40000, 1) == 0)
+ fprintf(stderr, "PASS: /dev/mem 0xc0000-0x100000 is readable\n");
+ else
+ fprintf(stderr, "FAIL: /dev/mem 0xc0000-0x100000 not accessible\n");
+
+ /*
+ * Often you can map all the individual pieces above (0-0xA0000,
+ * 0xA0000-0xC0000, and 0xC0000-0x100000), but can't map the whole
+ * thing at once. This is because the individual pieces use different
+ * attributes, and there's no single attribute supported over the
+ * whole region.
+ */
+ rc = map_mem("/dev/mem", 0, 1024*1024, 0);
+ if (rc == 0)
+ fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 is mappable\n");
+ else if (rc > 0)
+ fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 not mappable\n");
+ else
+ fprintf(stderr, "FAIL: /dev/mem 0x0-0x100000 not accessible\n");
+
+ scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 0xA0000, 1);
+ scan_tree("/sys/class/pci_bus", "legacy_mem", 0xA0000, 0x20000, 0);
+ scan_tree("/sys/class/pci_bus", "legacy_mem", 0xC0000, 0x40000, 1);
+ scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 1024*1024, 0);
+
+ scan_rom("/sys/devices", "rom");
+
+ scan_tree("/proc/bus/pci", "??.?", 0, 0xA0000, 1);
+ scan_tree("/proc/bus/pci", "??.?", 0xA0000, 0x20000, 0);
+ scan_tree("/proc/bus/pci", "??.?", 0xC0000, 0x40000, 1);
+ scan_tree("/proc/bus/pci", "??.?", 0, 1024*1024, 0);
+
+ return rc;
+}
diff --git a/tools/testing/selftests/intel_pstate/.gitignore b/tools/testing/selftests/intel_pstate/.gitignore
new file mode 100644
index 000000000..3bfcbae5f
--- /dev/null
+++ b/tools/testing/selftests/intel_pstate/.gitignore
@@ -0,0 +1,2 @@
+aperf
+msr
diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile
new file mode 100644
index 000000000..7340fd6a9
--- /dev/null
+++ b/tools/testing/selftests/intel_pstate/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
+LDLIBS := $(LDLIBS) -lm
+
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq (x86,$(ARCH))
+TEST_GEN_FILES := msr aperf
+endif
+
+TEST_PROGS := run.sh
+
+include ../lib.mk
+
+$(TEST_GEN_FILES): $(HEADERS)
diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c
new file mode 100644
index 000000000..f6cd03a87
--- /dev/null
+++ b/tools/testing/selftests/intel_pstate/aperf.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <math.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/timeb.h>
+#include <sched.h>
+#include <errno.h>
+#include <string.h>
+#include "../kselftest.h"
+
+void usage(char *name) {
+ printf ("Usage: %s cpunum\n", name);
+}
+
+int main(int argc, char **argv) {
+ unsigned int i, cpu, fd;
+ char msr_file_name[64];
+ long long tsc, old_tsc, new_tsc;
+ long long aperf, old_aperf, new_aperf;
+ long long mperf, old_mperf, new_mperf;
+ struct timeb before, after;
+ long long int start, finish, total;
+ cpu_set_t cpuset;
+
+ if (argc != 2) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ errno = 0;
+ cpu = strtol(argv[1], (char **) NULL, 10);
+
+ if (errno) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ sprintf(msr_file_name, "/dev/cpu/%d/msr", cpu);
+ fd = open(msr_file_name, O_RDONLY);
+
+ if (fd == -1) {
+ printf("/dev/cpu/%d/msr: %s\n", cpu, strerror(errno));
+ return KSFT_SKIP;
+ }
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ if (sched_setaffinity(0, sizeof(cpu_set_t), &cpuset)) {
+ perror("Failed to set cpu affinity");
+ return 1;
+ }
+
+ ftime(&before);
+ pread(fd, &old_tsc, sizeof(old_tsc), 0x10);
+ pread(fd, &old_aperf, sizeof(old_mperf), 0xe7);
+ pread(fd, &old_mperf, sizeof(old_aperf), 0xe8);
+
+ for (i=0; i<0x8fffffff; i++) {
+ sqrt(i);
+ }
+
+ ftime(&after);
+ pread(fd, &new_tsc, sizeof(new_tsc), 0x10);
+ pread(fd, &new_aperf, sizeof(new_mperf), 0xe7);
+ pread(fd, &new_mperf, sizeof(new_aperf), 0xe8);
+
+ tsc = new_tsc-old_tsc;
+ aperf = new_aperf-old_aperf;
+ mperf = new_mperf-old_mperf;
+
+ start = before.time*1000 + before.millitm;
+ finish = after.time*1000 + after.millitm;
+ total = finish - start;
+
+ printf("runTime: %4.2f\n", 1.0*total/1000);
+ printf("freq: %7.0f\n", tsc / (1.0*aperf / (1.0 * mperf)) / total);
+ return 0;
+}
diff --git a/tools/testing/selftests/intel_pstate/msr.c b/tools/testing/selftests/intel_pstate/msr.c
new file mode 100644
index 000000000..88fdd2a4b
--- /dev/null
+++ b/tools/testing/selftests/intel_pstate/msr.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <math.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/timeb.h>
+#include <sched.h>
+#include <errno.h>
+
+
+int main(int argc, char **argv) {
+ int cpu, fd;
+ long long msr;
+ char msr_file_name[64];
+
+ if (argc != 2)
+ return 1;
+
+ errno = 0;
+ cpu = strtol(argv[1], (char **) NULL, 10);
+
+ if (errno)
+ return 1;
+
+ sprintf(msr_file_name, "/dev/cpu/%d/msr", cpu);
+ fd = open(msr_file_name, O_RDONLY);
+
+ if (fd == -1) {
+ perror("Failed to open");
+ return 1;
+ }
+
+ pread(fd, &msr, sizeof(msr), 0x199);
+
+ printf("msr 0x199: 0x%llx\n", msr);
+ return 0;
+}
diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh
new file mode 100755
index 000000000..e7008f614
--- /dev/null
+++ b/tools/testing/selftests/intel_pstate/run.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test runs on Intel x86 based hardware which support the intel_pstate
+# driver. The test checks the frequency settings from the maximum turbo
+# state to the minimum supported frequency, in decrements of 100MHz. The
+# test runs the aperf.c program to put load on each processor.
+#
+# The results are displayed in a table which indicate the "Target" state,
+# or the requested frequency in MHz, the Actual frequency, as read from
+# /proc/cpuinfo, the difference between the Target and Actual frequencies,
+# and the value of MSR 0x199 (MSR_IA32_PERF_CTL) which indicates what
+# pstate the cpu is in, and the value of
+# /sys/devices/system/cpu/intel_pstate/max_perf_pct X maximum turbo state
+#
+# Notes: In some cases several frequency values may be placed in the
+# /tmp/result.X files. This is done on purpose in order to catch cases
+# where the pstate driver may not be working at all. There is the case
+# where, for example, several "similar" frequencies are in the file:
+#
+#
+#/tmp/result.3100:1:cpu MHz : 2899.980
+#/tmp/result.3100:2:cpu MHz : 2900.000
+#/tmp/result.3100:3:msr 0x199: 0x1e00
+#/tmp/result.3100:4:max_perf_pct 94
+#
+# and the test will error out in those cases. The result.X file can be checked
+# for consistency and modified to remove the extra MHz values. The result.X
+# files can be re-evaluated by setting EVALUATE_ONLY to 1 below.
+
+EVALUATE_ONLY=0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then
+ echo "$0 # Skipped: Test can only run on x86 architectures."
+ exit $ksft_skip
+fi
+
+msg="skip all tests:"
+if [ $UID != 0 ] && [ $EVALUATE_ONLY == 0 ]; then
+ echo $msg please run this as root >&2
+ exit $ksft_skip
+fi
+
+max_cpus=$(($(nproc)-1))
+
+function run_test () {
+
+ file_ext=$1
+ for cpu in `seq 0 $max_cpus`
+ do
+ echo "launching aperf load on $cpu"
+ ./aperf $cpu &
+ done
+
+ echo "sleeping for 5 seconds"
+ sleep 5
+ grep MHz /proc/cpuinfo | sort -u > /tmp/result.freqs
+ num_freqs=$(wc -l /tmp/result.freqs | awk ' { print $1 } ')
+ if [ $num_freqs -ge 2 ]; then
+ tail -n 1 /tmp/result.freqs > /tmp/result.$1
+ else
+ cp /tmp/result.freqs /tmp/result.$1
+ fi
+ ./msr 0 >> /tmp/result.$1
+
+ max_perf_pct=$(cat /sys/devices/system/cpu/intel_pstate/max_perf_pct)
+ echo "max_perf_pct $max_perf_pct" >> /tmp/result.$1
+
+ for job in `jobs -p`
+ do
+ echo "waiting for job id $job"
+ wait $job
+ done
+}
+
+#
+# MAIN (ALL UNITS IN MHZ)
+#
+
+# Get the marketing frequency
+_mkt_freq=$(cat /proc/cpuinfo | grep -m 1 "model name" | awk '{print $NF}')
+_mkt_freq=$(echo $_mkt_freq | tr -d [:alpha:][:punct:])
+mkt_freq=${_mkt_freq}0
+
+# Get the ranges from cpupower
+_min_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $1 } ')
+min_freq=$(($_min_freq / 1000))
+_max_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $2 } ')
+max_freq=$(($_max_freq / 1000))
+
+
+[ $EVALUATE_ONLY -eq 0 ] && for freq in `seq $max_freq -100 $min_freq`
+do
+ echo "Setting maximum frequency to $freq"
+ cpupower frequency-set -g powersave --max=${freq}MHz >& /dev/null
+ run_test $freq
+done
+
+[ $EVALUATE_ONLY -eq 0 ] && cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null
+
+echo "========================================================================"
+echo "The marketing frequency of the cpu is $mkt_freq MHz"
+echo "The maximum frequency of the cpu is $max_freq MHz"
+echo "The minimum frequency of the cpu is $min_freq MHz"
+
+# make a pretty table
+echo "Target Actual Difference MSR(0x199) max_perf_pct" | tr " " "\n" > /tmp/result.tab
+for freq in `seq $max_freq -100 $min_freq`
+do
+ result_freq=$(cat /tmp/result.${freq} | grep "cpu MHz" | awk ' { print $4 } ' | awk -F "." ' { print $1 } ')
+ msr=$(cat /tmp/result.${freq} | grep "msr" | awk ' { print $3 } ')
+ max_perf_pct=$(cat /tmp/result.${freq} | grep "max_perf_pct" | awk ' { print $2 } ' )
+ cat >> /tmp/result.tab << EOF
+$freq
+$result_freq
+$((result_freq - freq))
+$msr
+$((max_perf_pct * max_freq))
+EOF
+done
+
+# print the table
+pr -aTt -5 < /tmp/result.tab
+
+exit 0
diff --git a/tools/testing/selftests/ipc/.gitignore b/tools/testing/selftests/ipc/.gitignore
new file mode 100644
index 000000000..9af04c935
--- /dev/null
+++ b/tools/testing/selftests/ipc/.gitignore
@@ -0,0 +1,2 @@
+msgque_test
+msgque
diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile
new file mode 100644
index 000000000..1c4448a84
--- /dev/null
+++ b/tools/testing/selftests/ipc/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
+ifeq ($(ARCH),i386)
+ ARCH := x86
+ CFLAGS := -DCONFIG_X86_32 -D__i386__
+endif
+ifeq ($(ARCH),x86_64)
+ ARCH := x86
+ CFLAGS := -DCONFIG_X86_64 -D__x86_64__
+endif
+
+CFLAGS += -I../../../../usr/include/
+
+TEST_GEN_PROGS := msgque
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/ipc/config b/tools/testing/selftests/ipc/config
new file mode 100644
index 000000000..070244710
--- /dev/null
+++ b/tools/testing/selftests/ipc/config
@@ -0,0 +1,2 @@
+CONFIG_EXPERT=y
+CONFIG_CHECKPOINT_RESTORE=y
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
new file mode 100644
index 000000000..5ec4d9e18
--- /dev/null
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/msg.h>
+#include <fcntl.h>
+
+#include "../kselftest.h"
+
+#define MAX_MSG_SIZE 32
+
+struct msg1 {
+ int msize;
+ long mtype;
+ char mtext[MAX_MSG_SIZE];
+};
+
+#define TEST_STRING "Test sysv5 msg"
+#define MSG_TYPE 1
+
+#define ANOTHER_TEST_STRING "Yet another test sysv5 msg"
+#define ANOTHER_MSG_TYPE 26538
+
+struct msgque_data {
+ key_t key;
+ int msq_id;
+ int qbytes;
+ int qnum;
+ int mode;
+ struct msg1 *messages;
+};
+
+int restore_queue(struct msgque_data *msgque)
+{
+ int fd, ret, id, i;
+ char buf[32];
+
+ fd = open("/proc/sys/kernel/msg_next_id", O_WRONLY);
+ if (fd == -1) {
+ printf("Failed to open /proc/sys/kernel/msg_next_id\n");
+ return -errno;
+ }
+ sprintf(buf, "%d", msgque->msq_id);
+
+ ret = write(fd, buf, strlen(buf));
+ if (ret != strlen(buf)) {
+ printf("Failed to write to /proc/sys/kernel/msg_next_id\n");
+ return -errno;
+ }
+
+ id = msgget(msgque->key, msgque->mode | IPC_CREAT | IPC_EXCL);
+ if (id == -1) {
+ printf("Failed to create queue\n");
+ return -errno;
+ }
+
+ if (id != msgque->msq_id) {
+ printf("Restored queue has wrong id (%d instead of %d)\n",
+ id, msgque->msq_id);
+ ret = -EFAULT;
+ goto destroy;
+ }
+
+ for (i = 0; i < msgque->qnum; i++) {
+ if (msgsnd(msgque->msq_id, &msgque->messages[i].mtype,
+ msgque->messages[i].msize, IPC_NOWAIT) != 0) {
+ printf("msgsnd failed (%m)\n");
+ ret = -errno;
+ goto destroy;
+ };
+ }
+ return 0;
+
+destroy:
+ if (msgctl(id, IPC_RMID, NULL))
+ printf("Failed to destroy queue: %d\n", -errno);
+ return ret;
+}
+
+int check_and_destroy_queue(struct msgque_data *msgque)
+{
+ struct msg1 message;
+ int cnt = 0, ret;
+
+ while (1) {
+ ret = msgrcv(msgque->msq_id, &message.mtype, MAX_MSG_SIZE,
+ 0, IPC_NOWAIT);
+ if (ret < 0) {
+ if (errno == ENOMSG)
+ break;
+ printf("Failed to read IPC message: %m\n");
+ ret = -errno;
+ goto err;
+ }
+ if (ret != msgque->messages[cnt].msize) {
+ printf("Wrong message size: %d (expected %d)\n", ret,
+ msgque->messages[cnt].msize);
+ ret = -EINVAL;
+ goto err;
+ }
+ if (message.mtype != msgque->messages[cnt].mtype) {
+ printf("Wrong message type\n");
+ ret = -EINVAL;
+ goto err;
+ }
+ if (memcmp(message.mtext, msgque->messages[cnt].mtext, ret)) {
+ printf("Wrong message content\n");
+ ret = -EINVAL;
+ goto err;
+ }
+ cnt++;
+ }
+
+ if (cnt != msgque->qnum) {
+ printf("Wrong message number\n");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = 0;
+err:
+ if (msgctl(msgque->msq_id, IPC_RMID, NULL)) {
+ printf("Failed to destroy queue: %d\n", -errno);
+ return -errno;
+ }
+ return ret;
+}
+
+int dump_queue(struct msgque_data *msgque)
+{
+ struct msqid_ds ds;
+ int kern_id;
+ int i, ret;
+
+ for (kern_id = 0; kern_id < 256; kern_id++) {
+ ret = msgctl(kern_id, MSG_STAT, &ds);
+ if (ret < 0) {
+ if (errno == EINVAL)
+ continue;
+ printf("Failed to get stats for IPC queue with id %d\n",
+ kern_id);
+ return -errno;
+ }
+
+ if (ret == msgque->msq_id)
+ break;
+ }
+
+ msgque->messages = malloc(sizeof(struct msg1) * ds.msg_qnum);
+ if (msgque->messages == NULL) {
+ printf("Failed to get stats for IPC queue\n");
+ return -ENOMEM;
+ }
+
+ msgque->qnum = ds.msg_qnum;
+ msgque->mode = ds.msg_perm.mode;
+ msgque->qbytes = ds.msg_qbytes;
+
+ for (i = 0; i < msgque->qnum; i++) {
+ ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype,
+ MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY);
+ if (ret < 0) {
+ printf("Failed to copy IPC message: %m (%d)\n", errno);
+ return -errno;
+ }
+ msgque->messages[i].msize = ret;
+ }
+ return 0;
+}
+
+int fill_msgque(struct msgque_data *msgque)
+{
+ struct msg1 msgbuf;
+
+ msgbuf.mtype = MSG_TYPE;
+ memcpy(msgbuf.mtext, TEST_STRING, sizeof(TEST_STRING));
+ if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(TEST_STRING),
+ IPC_NOWAIT) != 0) {
+ printf("First message send failed (%m)\n");
+ return -errno;
+ };
+
+ msgbuf.mtype = ANOTHER_MSG_TYPE;
+ memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING));
+ if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(ANOTHER_TEST_STRING),
+ IPC_NOWAIT) != 0) {
+ printf("Second message send failed (%m)\n");
+ return -errno;
+ };
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int msg, pid, err;
+ struct msgque_data msgque;
+
+ if (getuid() != 0)
+ return ksft_exit_skip(
+ "Please run the test as root - Exiting.\n");
+
+ msgque.key = ftok(argv[0], 822155650);
+ if (msgque.key == -1) {
+ printf("Can't make key: %d\n", -errno);
+ return ksft_exit_fail();
+ }
+
+ msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666);
+ if (msgque.msq_id == -1) {
+ err = -errno;
+ printf("Can't create queue: %d\n", err);
+ goto err_out;
+ }
+
+ err = fill_msgque(&msgque);
+ if (err) {
+ printf("Failed to fill queue: %d\n", err);
+ goto err_destroy;
+ }
+
+ err = dump_queue(&msgque);
+ if (err) {
+ printf("Failed to dump queue: %d\n", err);
+ goto err_destroy;
+ }
+
+ err = check_and_destroy_queue(&msgque);
+ if (err) {
+ printf("Failed to check and destroy queue: %d\n", err);
+ goto err_out;
+ }
+
+ err = restore_queue(&msgque);
+ if (err) {
+ printf("Failed to restore queue: %d\n", err);
+ goto err_destroy;
+ }
+
+ err = check_and_destroy_queue(&msgque);
+ if (err) {
+ printf("Failed to test queue: %d\n", err);
+ goto err_out;
+ }
+ return ksft_exit_pass();
+
+err_destroy:
+ if (msgctl(msgque.msq_id, IPC_RMID, NULL)) {
+ printf("Failed to destroy queue: %d\n", -errno);
+ return ksft_exit_fail();
+ }
+err_out:
+ return ksft_exit_fail();
+}
diff --git a/tools/testing/selftests/kcmp/.gitignore b/tools/testing/selftests/kcmp/.gitignore
new file mode 100644
index 000000000..5a9b3732b
--- /dev/null
+++ b/tools/testing/selftests/kcmp/.gitignore
@@ -0,0 +1,2 @@
+kcmp_test
+kcmp-test-file
diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile
new file mode 100644
index 000000000..47aa9887f
--- /dev/null
+++ b/tools/testing/selftests/kcmp/Makefile
@@ -0,0 +1,8 @@
+CFLAGS += -I../../../../usr/include/
+
+TEST_GEN_PROGS := kcmp_test
+
+EXTRA_CLEAN := $(OUTPUT)/kcmp-test-file
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c
new file mode 100644
index 000000000..6ea7b9f37
--- /dev/null
+++ b/tools/testing/selftests/kcmp/kcmp_test.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <limits.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <linux/unistd.h>
+#include <linux/kcmp.h>
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/epoll.h>
+
+#include "../kselftest.h"
+
+static long sys_kcmp(int pid1, int pid2, int type, unsigned long fd1, unsigned long fd2)
+{
+ return syscall(__NR_kcmp, pid1, pid2, type, fd1, fd2);
+}
+
+static const unsigned int duped_num = 64;
+
+int main(int argc, char **argv)
+{
+ const char kpath[] = "kcmp-test-file";
+ struct kcmp_epoll_slot epoll_slot;
+ struct epoll_event ev;
+ int pid1, pid2;
+ int pipefd[2];
+ int fd1, fd2;
+ int epollfd;
+ int status;
+ int fddup;
+
+ fd1 = open(kpath, O_RDWR | O_CREAT | O_TRUNC, 0644);
+ pid1 = getpid();
+
+ if (fd1 < 0) {
+ perror("Can't create file");
+ ksft_exit_fail();
+ }
+
+ if (pipe(pipefd)) {
+ perror("Can't create pipe");
+ ksft_exit_fail();
+ }
+
+ epollfd = epoll_create1(0);
+ if (epollfd < 0) {
+ perror("epoll_create1 failed");
+ ksft_exit_fail();
+ }
+
+ memset(&ev, 0xff, sizeof(ev));
+ ev.events = EPOLLIN | EPOLLOUT;
+
+ if (epoll_ctl(epollfd, EPOLL_CTL_ADD, pipefd[0], &ev)) {
+ perror("epoll_ctl failed");
+ ksft_exit_fail();
+ }
+
+ fddup = dup2(pipefd[1], duped_num);
+ if (fddup < 0) {
+ perror("dup2 failed");
+ ksft_exit_fail();
+ }
+
+ if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fddup, &ev)) {
+ perror("epoll_ctl failed");
+ ksft_exit_fail();
+ }
+ close(fddup);
+
+ pid2 = fork();
+ if (pid2 < 0) {
+ perror("fork failed");
+ ksft_exit_fail();
+ }
+
+ if (!pid2) {
+ int pid2 = getpid();
+ int ret;
+
+ fd2 = open(kpath, O_RDWR, 0644);
+ if (fd2 < 0) {
+ perror("Can't open file");
+ ksft_exit_fail();
+ }
+
+ /* An example of output and arguments */
+ printf("pid1: %6d pid2: %6d FD: %2ld FILES: %2ld VM: %2ld "
+ "FS: %2ld SIGHAND: %2ld IO: %2ld SYSVSEM: %2ld "
+ "INV: %2ld\n",
+ pid1, pid2,
+ sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd2),
+ sys_kcmp(pid1, pid2, KCMP_FILES, 0, 0),
+ sys_kcmp(pid1, pid2, KCMP_VM, 0, 0),
+ sys_kcmp(pid1, pid2, KCMP_FS, 0, 0),
+ sys_kcmp(pid1, pid2, KCMP_SIGHAND, 0, 0),
+ sys_kcmp(pid1, pid2, KCMP_IO, 0, 0),
+ sys_kcmp(pid1, pid2, KCMP_SYSVSEM, 0, 0),
+
+ /* This one should fail */
+ sys_kcmp(pid1, pid2, KCMP_TYPES + 1, 0, 0));
+
+ /* This one should return same fd */
+ ret = sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd1);
+ if (ret) {
+ printf("FAIL: 0 expected but %d returned (%s)\n",
+ ret, strerror(errno));
+ ksft_inc_fail_cnt();
+ ret = -1;
+ } else {
+ printf("PASS: 0 returned as expected\n");
+ ksft_inc_pass_cnt();
+ }
+
+ /* Compare with self */
+ ret = sys_kcmp(pid1, pid1, KCMP_VM, 0, 0);
+ if (ret) {
+ printf("FAIL: 0 expected but %d returned (%s)\n",
+ ret, strerror(errno));
+ ksft_inc_fail_cnt();
+ ret = -1;
+ } else {
+ printf("PASS: 0 returned as expected\n");
+ ksft_inc_pass_cnt();
+ }
+
+ /* Compare epoll target */
+ epoll_slot = (struct kcmp_epoll_slot) {
+ .efd = epollfd,
+ .tfd = duped_num,
+ .toff = 0,
+ };
+ ret = sys_kcmp(pid1, pid1, KCMP_EPOLL_TFD, pipefd[1],
+ (unsigned long)(void *)&epoll_slot);
+ if (ret) {
+ printf("FAIL: 0 expected but %d returned (%s)\n",
+ ret, strerror(errno));
+ ksft_inc_fail_cnt();
+ ret = -1;
+ } else {
+ printf("PASS: 0 returned as expected\n");
+ ksft_inc_pass_cnt();
+ }
+
+ ksft_print_cnts();
+
+ if (ret)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+ }
+
+ waitpid(pid2, &status, P_ALL);
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/kmod/Makefile b/tools/testing/selftests/kmod/Makefile
new file mode 100644
index 000000000..fa2ccc5fb
--- /dev/null
+++ b/tools/testing/selftests/kmod/Makefile
@@ -0,0 +1,11 @@
+# Makefile for kmod loading selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := kmod.sh
+
+include ../lib.mk
+
+# Nothing to clean up.
+clean:
diff --git a/tools/testing/selftests/kmod/config b/tools/testing/selftests/kmod/config
new file mode 100644
index 000000000..259f4fd6b
--- /dev/null
+++ b/tools/testing/selftests/kmod/config
@@ -0,0 +1,7 @@
+CONFIG_TEST_KMOD=m
+CONFIG_TEST_LKM=m
+CONFIG_XFS_FS=m
+
+# For the module parameter force_init_test is used
+CONFIG_TUN=m
+CONFIG_BTRFS_FS=m
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
new file mode 100755
index 000000000..1f118916a
--- /dev/null
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -0,0 +1,623 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or at your option any
+# later version; or, when distributed separately from the Linux kernel or
+# when incorporated into other software packages, subject to the following
+# license:
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of copyleft-next (version 0.3.1 or later) as published
+# at http://copyleft-next.org/.
+
+# This is a stress test script for kmod, the kernel module loader. It uses
+# test_kmod which exposes a series of knobs for the API for us so we can
+# tweak each test in userspace rather than in kernelspace.
+#
+# The way kmod works is it uses the kernel's usermode helper API to eventually
+# call /sbin/modprobe. It has a limit of the number of concurrent calls
+# possible. The kernel interface to load modules is request_module(), however
+# mount uses get_fs_type(). Both behave slightly differently, but the
+# differences are important enough to test each call separately. For this
+# reason test_kmod starts by providing tests for both calls.
+#
+# The test driver test_kmod assumes a series of defaults which you can
+# override by exporting to your environment prior running this script.
+# For instance this script assumes you do not have xfs loaded upon boot.
+# If this is false, export DEFAULT_KMOD_FS="ext4" prior to running this
+# script if the filesyste module you don't have loaded upon bootup
+# is ext4 instead. Refer to allow_user_defaults() for a list of user
+# override variables possible.
+#
+# You'll want at least 4 GiB of RAM to expect to run these tests
+# without running out of memory on them. For other requirements refer
+# to test_reqs()
+
+set -e
+
+TEST_NAME="kmod"
+TEST_DRIVER="test_${TEST_NAME}"
+TEST_DIR=$(dirname $0)
+
+# This represents
+#
+# TEST_ID:TEST_COUNT:ENABLED
+#
+# TEST_ID: is the test id number
+# TEST_COUNT: number of times we should run the test
+# ENABLED: 1 if enabled, 0 otherwise
+#
+# Once these are enabled please leave them as-is. Write your own test,
+# we have tons of space.
+ALL_TESTS="0001:3:1"
+ALL_TESTS="$ALL_TESTS 0002:3:1"
+ALL_TESTS="$ALL_TESTS 0003:1:1"
+ALL_TESTS="$ALL_TESTS 0004:1:1"
+ALL_TESTS="$ALL_TESTS 0005:10:1"
+ALL_TESTS="$ALL_TESTS 0006:10:1"
+ALL_TESTS="$ALL_TESTS 0007:5:1"
+ALL_TESTS="$ALL_TESTS 0008:150:1"
+ALL_TESTS="$ALL_TESTS 0009:150:1"
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+test_modprobe()
+{
+ if [ ! -d $DIR ]; then
+ echo "$0: $DIR not present" >&2
+ echo "You must have the following enabled in your kernel:" >&2
+ cat $TEST_DIR/config >&2
+ exit $ksft_skip
+ fi
+}
+
+function allow_user_defaults()
+{
+ if [ -z $DEFAULT_KMOD_DRIVER ]; then
+ DEFAULT_KMOD_DRIVER="test_module"
+ fi
+
+ if [ -z $DEFAULT_KMOD_FS ]; then
+ DEFAULT_KMOD_FS="xfs"
+ fi
+
+ if [ -z $PROC_DIR ]; then
+ PROC_DIR="/proc/sys/kernel/"
+ fi
+
+ if [ -z $MODPROBE_LIMIT ]; then
+ MODPROBE_LIMIT=50
+ fi
+
+ if [ -z $DIR ]; then
+ DIR="/sys/devices/virtual/misc/${TEST_DRIVER}0/"
+ fi
+
+ if [ -z $DEFAULT_NUM_TESTS ]; then
+ DEFAULT_NUM_TESTS=150
+ fi
+
+ MODPROBE_LIMIT_FILE="${PROC_DIR}/kmod-limit"
+}
+
+test_reqs()
+{
+ if ! which modprobe 2> /dev/null > /dev/null; then
+ echo "$0: You need modprobe installed" >&2
+ exit $ksft_skip
+ fi
+
+ if ! which kmod 2> /dev/null > /dev/null; then
+ echo "$0: You need kmod installed" >&2
+ exit $ksft_skip
+ fi
+
+ # kmod 19 has a bad bug where it returns 0 when modprobe
+ # gets called *even* if the module was not loaded due to
+ # some bad heuristics. For details see:
+ #
+ # A work around is possible in-kernel but its rather
+ # complex.
+ KMOD_VERSION=$(kmod --version | awk '{print $3}')
+ if [[ $KMOD_VERSION -le 19 ]]; then
+ echo "$0: You need at least kmod 20" >&2
+ echo "kmod <= 19 is buggy, for details see:" >&2
+ echo "http://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2
+ exit $ksft_skip
+ fi
+
+ uid=$(id -u)
+ if [ $uid -ne 0 ]; then
+ echo $msg must be run as root >&2
+ exit $ksft_skip
+ fi
+}
+
+function load_req_mod()
+{
+ trap "test_modprobe" EXIT
+
+ if [ ! -d $DIR ]; then
+ # Alanis: "Oh isn't it ironic?"
+ modprobe $TEST_DRIVER
+ fi
+}
+
+test_finish()
+{
+ echo "Test completed"
+}
+
+errno_name_to_val()
+{
+ case "$1" in
+ # kmod calls modprobe and upon of a module not found
+ # modprobe returns just 1... However in the kernel we
+ # *sometimes* see 256...
+ MODULE_NOT_FOUND)
+ echo 256;;
+ SUCCESS)
+ echo 0;;
+ -EPERM)
+ echo -1;;
+ -ENOENT)
+ echo -2;;
+ -EINVAL)
+ echo -22;;
+ -ERR_ANY)
+ echo -123456;;
+ *)
+ echo invalid;;
+ esac
+}
+
+errno_val_to_name()
+ case "$1" in
+ 256)
+ echo MODULE_NOT_FOUND;;
+ 0)
+ echo SUCCESS;;
+ -1)
+ echo -EPERM;;
+ -2)
+ echo -ENOENT;;
+ -22)
+ echo -EINVAL;;
+ -123456)
+ echo -ERR_ANY;;
+ *)
+ echo invalid;;
+ esac
+
+config_set_test_case_driver()
+{
+ if ! echo -n 1 >$DIR/config_test_case; then
+ echo "$0: Unable to set to test case to driver" >&2
+ exit 1
+ fi
+}
+
+config_set_test_case_fs()
+{
+ if ! echo -n 2 >$DIR/config_test_case; then
+ echo "$0: Unable to set to test case to fs" >&2
+ exit 1
+ fi
+}
+
+config_num_threads()
+{
+ if ! echo -n $1 >$DIR/config_num_threads; then
+ echo "$0: Unable to set to number of threads" >&2
+ exit 1
+ fi
+}
+
+config_get_modprobe_limit()
+{
+ if [[ -f ${MODPROBE_LIMIT_FILE} ]] ; then
+ MODPROBE_LIMIT=$(cat $MODPROBE_LIMIT_FILE)
+ fi
+ echo $MODPROBE_LIMIT
+}
+
+config_num_thread_limit_extra()
+{
+ MODPROBE_LIMIT=$(config_get_modprobe_limit)
+ let EXTRA_LIMIT=$MODPROBE_LIMIT+$1
+ config_num_threads $EXTRA_LIMIT
+}
+
+# For special characters use printf directly,
+# refer to kmod_test_0001
+config_set_driver()
+{
+ if ! echo -n $1 >$DIR/config_test_driver; then
+ echo "$0: Unable to set driver" >&2
+ exit 1
+ fi
+}
+
+config_set_fs()
+{
+ if ! echo -n $1 >$DIR/config_test_fs; then
+ echo "$0: Unable to set driver" >&2
+ exit 1
+ fi
+}
+
+config_get_driver()
+{
+ cat $DIR/config_test_driver
+}
+
+config_get_test_result()
+{
+ cat $DIR/test_result
+}
+
+config_reset()
+{
+ if ! echo -n "1" >"$DIR"/reset; then
+ echo "$0: reset shuld have worked" >&2
+ exit 1
+ fi
+}
+
+config_show_config()
+{
+ echo "----------------------------------------------------"
+ cat "$DIR"/config
+ echo "----------------------------------------------------"
+}
+
+config_trigger()
+{
+ if ! echo -n "1" >"$DIR"/trigger_config 2>/dev/null; then
+ echo "$1: FAIL - loading should have worked"
+ config_show_config
+ exit 1
+ fi
+ echo "$1: OK! - loading kmod test"
+}
+
+config_trigger_want_fail()
+{
+ if echo "1" > $DIR/trigger_config 2>/dev/null; then
+ echo "$1: FAIL - test case was expected to fail"
+ config_show_config
+ exit 1
+ fi
+ echo "$1: OK! - kmod test case failed as expected"
+}
+
+config_expect_result()
+{
+ RC=$(config_get_test_result)
+ RC_NAME=$(errno_val_to_name $RC)
+
+ ERRNO_NAME=$2
+ ERRNO=$(errno_name_to_val $ERRNO_NAME)
+
+ if [[ $ERRNO_NAME = "-ERR_ANY" ]]; then
+ if [[ $RC -ge 0 ]]; then
+ echo "$1: FAIL, test expects $ERRNO_NAME - got $RC_NAME ($RC)" >&2
+ config_show_config
+ exit 1
+ fi
+ elif [[ $RC != $ERRNO ]]; then
+ echo "$1: FAIL, test expects $ERRNO_NAME ($ERRNO) - got $RC_NAME ($RC)" >&2
+ config_show_config
+ exit 1
+ fi
+ echo "$1: OK! - Return value: $RC ($RC_NAME), expected $ERRNO_NAME"
+}
+
+kmod_defaults_driver()
+{
+ config_reset
+ modprobe -r $DEFAULT_KMOD_DRIVER
+ config_set_driver $DEFAULT_KMOD_DRIVER
+}
+
+kmod_defaults_fs()
+{
+ config_reset
+ modprobe -r $DEFAULT_KMOD_FS
+ config_set_fs $DEFAULT_KMOD_FS
+ config_set_test_case_fs
+}
+
+kmod_test_0001_driver()
+{
+ NAME='\000'
+
+ kmod_defaults_driver
+ config_num_threads 1
+ printf '\000' >"$DIR"/config_test_driver
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} MODULE_NOT_FOUND
+}
+
+kmod_test_0001_fs()
+{
+ NAME='\000'
+
+ kmod_defaults_fs
+ config_num_threads 1
+ printf '\000' >"$DIR"/config_test_fs
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} -EINVAL
+}
+
+kmod_test_0001()
+{
+ kmod_test_0001_driver
+ kmod_test_0001_fs
+}
+
+kmod_test_0002_driver()
+{
+ NAME="nope-$DEFAULT_KMOD_DRIVER"
+
+ kmod_defaults_driver
+ config_set_driver $NAME
+ config_num_threads 1
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} MODULE_NOT_FOUND
+}
+
+kmod_test_0002_fs()
+{
+ NAME="nope-$DEFAULT_KMOD_FS"
+
+ kmod_defaults_fs
+ config_set_fs $NAME
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} -EINVAL
+}
+
+kmod_test_0002()
+{
+ kmod_test_0002_driver
+ kmod_test_0002_fs
+}
+
+kmod_test_0003()
+{
+ kmod_defaults_fs
+ config_num_threads 1
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0004()
+{
+ kmod_defaults_fs
+ config_num_threads 2
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0005()
+{
+ kmod_defaults_driver
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0006()
+{
+ kmod_defaults_fs
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0007()
+{
+ kmod_test_0005
+ kmod_test_0006
+}
+
+kmod_test_0008()
+{
+ kmod_defaults_driver
+ MODPROBE_LIMIT=$(config_get_modprobe_limit)
+ let EXTRA=$MODPROBE_LIMIT/6
+ config_num_thread_limit_extra $EXTRA
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0009()
+{
+ kmod_defaults_fs
+ MODPROBE_LIMIT=$(config_get_modprobe_limit)
+ let EXTRA=$MODPROBE_LIMIT/4
+ config_num_thread_limit_extra $EXTRA
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+list_tests()
+{
+ echo "Test ID list:"
+ echo
+ echo "TEST_ID x NUM_TEST"
+ echo "TEST_ID: Test ID"
+ echo "NUM_TESTS: Number of recommended times to run the test"
+ echo
+ echo "0001 x $(get_test_count 0001) - Simple test - 1 thread for empty string"
+ echo "0002 x $(get_test_count 0002) - Simple test - 1 thread for modules/filesystems that do not exist"
+ echo "0003 x $(get_test_count 0003) - Simple test - 1 thread for get_fs_type() only"
+ echo "0004 x $(get_test_count 0004) - Simple test - 2 threads for get_fs_type() only"
+ echo "0005 x $(get_test_count 0005) - multithreaded tests with default setup - request_module() only"
+ echo "0006 x $(get_test_count 0006) - multithreaded tests with default setup - get_fs_type() only"
+ echo "0007 x $(get_test_count 0007) - multithreaded tests with default setup test request_module() and get_fs_type()"
+ echo "0008 x $(get_test_count 0008) - multithreaded - push kmod_concurrent over max_modprobes for request_module()"
+ echo "0009 x $(get_test_count 0009) - multithreaded - push kmod_concurrent over max_modprobes for get_fs_type()"
+}
+
+usage()
+{
+ NUM_TESTS=$(grep -o ' ' <<<"$ALL_TESTS" | grep -c .)
+ let NUM_TESTS=$NUM_TESTS+1
+ MAX_TEST=$(printf "%04d\n" $NUM_TESTS)
+ echo "Usage: $0 [ -t <4-number-digit> ] | [ -w <4-number-digit> ] |"
+ echo " [ -s <4-number-digit> ] | [ -c <4-number-digit> <test- count>"
+ echo " [ all ] [ -h | --help ] [ -l ]"
+ echo ""
+ echo "Valid tests: 0001-$MAX_TEST"
+ echo ""
+ echo " all Runs all tests (default)"
+ echo " -t Run test ID the number amount of times is recommended"
+ echo " -w Watch test ID run until it runs into an error"
+ echo " -s Run test ID once"
+ echo " -c Run test ID x test-count number of times"
+ echo " -l List all test ID list"
+ echo " -h|--help Help"
+ echo
+ echo "If an error every occurs execution will immediately terminate."
+ echo "If you are adding a new test try using -w <test-ID> first to"
+ echo "make sure the test passes a series of tests."
+ echo
+ echo Example uses:
+ echo
+ echo "${TEST_NAME}.sh -- executes all tests"
+ echo "${TEST_NAME}.sh -t 0008 -- Executes test ID 0008 number of times is recomended"
+ echo "${TEST_NAME}.sh -w 0008 -- Watch test ID 0008 run until an error occurs"
+ echo "${TEST_NAME}.sh -s 0008 -- Run test ID 0008 once"
+ echo "${TEST_NAME}.sh -c 0008 3 -- Run test ID 0008 three times"
+ echo
+ list_tests
+ exit 1
+}
+
+function test_num()
+{
+ re='^[0-9]+$'
+ if ! [[ $1 =~ $re ]]; then
+ usage
+ fi
+}
+
+function get_test_data()
+{
+ test_num $1
+ local field_num=$(echo $1 | sed 's/^0*//')
+ echo $ALL_TESTS | awk '{print $'$field_num'}'
+}
+
+function get_test_count()
+{
+ TEST_DATA=$(get_test_data $1)
+ LAST_TWO=${TEST_DATA#*:*}
+ echo ${LAST_TWO%:*}
+}
+
+function get_test_enabled()
+{
+ TEST_DATA=$(get_test_data $1)
+ echo ${TEST_DATA#*:*:}
+}
+
+function run_all_tests()
+{
+ for i in $ALL_TESTS ; do
+ TEST_ID=${i%:*:*}
+ ENABLED=$(get_test_enabled $TEST_ID)
+ TEST_COUNT=$(get_test_count $TEST_ID)
+ if [[ $ENABLED -eq "1" ]]; then
+ test_case $TEST_ID $TEST_COUNT
+ fi
+ done
+}
+
+function watch_log()
+{
+ if [ $# -ne 3 ]; then
+ clear
+ fi
+ date
+ echo "Running test: $2 - run #$1"
+}
+
+function watch_case()
+{
+ i=0
+ while [ 1 ]; do
+
+ if [ $# -eq 1 ]; then
+ test_num $1
+ watch_log $i ${TEST_NAME}_test_$1
+ ${TEST_NAME}_test_$1
+ else
+ watch_log $i all
+ run_all_tests
+ fi
+ let i=$i+1
+ done
+}
+
+function test_case()
+{
+ NUM_TESTS=$DEFAULT_NUM_TESTS
+ if [ $# -eq 2 ]; then
+ NUM_TESTS=$2
+ fi
+
+ i=0
+ while [ $i -lt $NUM_TESTS ]; do
+ test_num $1
+ watch_log $i ${TEST_NAME}_test_$1 noclear
+ RUN_TEST=${TEST_NAME}_test_$1
+ $RUN_TEST
+ let i=$i+1
+ done
+}
+
+function parse_args()
+{
+ if [ $# -eq 0 ]; then
+ run_all_tests
+ else
+ if [[ "$1" = "all" ]]; then
+ run_all_tests
+ elif [[ "$1" = "-w" ]]; then
+ shift
+ watch_case $@
+ elif [[ "$1" = "-t" ]]; then
+ shift
+ test_num $1
+ test_case $1 $(get_test_count $1)
+ elif [[ "$1" = "-c" ]]; then
+ shift
+ test_num $1
+ test_num $2
+ test_case $1 $2
+ elif [[ "$1" = "-s" ]]; then
+ shift
+ test_case $1 1
+ elif [[ "$1" = "-l" ]]; then
+ list_tests
+ elif [[ "$1" = "-h" || "$1" = "--help" ]]; then
+ usage
+ else
+ usage
+ fi
+ fi
+}
+
+test_reqs
+allow_user_defaults
+load_req_mod
+
+trap "test_finish" EXIT
+
+parse_args $@
+
+exit 0
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
new file mode 100644
index 000000000..a3edb2c8e
--- /dev/null
+++ b/tools/testing/selftests/kselftest.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * kselftest.h: kselftest framework return codes to include from
+ * selftests.
+ *
+ * Copyright (c) 2014 Shuah Khan <shuahkh@osg.samsung.com>
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ *
+ */
+#ifndef __KSELFTEST_H
+#define __KSELFTEST_H
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdarg.h>
+
+/* define kselftest exit codes */
+#define KSFT_PASS 0
+#define KSFT_FAIL 1
+#define KSFT_XFAIL 2
+#define KSFT_XPASS 3
+#define KSFT_SKIP 4
+
+/* counters */
+struct ksft_count {
+ unsigned int ksft_pass;
+ unsigned int ksft_fail;
+ unsigned int ksft_xfail;
+ unsigned int ksft_xpass;
+ unsigned int ksft_xskip;
+ unsigned int ksft_error;
+};
+
+static struct ksft_count ksft_cnt;
+
+static inline int ksft_test_num(void)
+{
+ return ksft_cnt.ksft_pass + ksft_cnt.ksft_fail +
+ ksft_cnt.ksft_xfail + ksft_cnt.ksft_xpass +
+ ksft_cnt.ksft_xskip + ksft_cnt.ksft_error;
+}
+
+static inline void ksft_inc_pass_cnt(void) { ksft_cnt.ksft_pass++; }
+static inline void ksft_inc_fail_cnt(void) { ksft_cnt.ksft_fail++; }
+static inline void ksft_inc_xfail_cnt(void) { ksft_cnt.ksft_xfail++; }
+static inline void ksft_inc_xpass_cnt(void) { ksft_cnt.ksft_xpass++; }
+static inline void ksft_inc_xskip_cnt(void) { ksft_cnt.ksft_xskip++; }
+static inline void ksft_inc_error_cnt(void) { ksft_cnt.ksft_error++; }
+
+static inline int ksft_get_pass_cnt(void) { return ksft_cnt.ksft_pass; }
+static inline int ksft_get_fail_cnt(void) { return ksft_cnt.ksft_fail; }
+static inline int ksft_get_xfail_cnt(void) { return ksft_cnt.ksft_xfail; }
+static inline int ksft_get_xpass_cnt(void) { return ksft_cnt.ksft_xpass; }
+static inline int ksft_get_xskip_cnt(void) { return ksft_cnt.ksft_xskip; }
+static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; }
+
+static inline void ksft_print_header(void)
+{
+ if (!(getenv("KSFT_TAP_LEVEL")))
+ printf("TAP version 13\n");
+}
+
+static inline void ksft_print_cnts(void)
+{
+ printf("Pass %d Fail %d Xfail %d Xpass %d Skip %d Error %d\n",
+ ksft_cnt.ksft_pass, ksft_cnt.ksft_fail,
+ ksft_cnt.ksft_xfail, ksft_cnt.ksft_xpass,
+ ksft_cnt.ksft_xskip, ksft_cnt.ksft_error);
+ printf("1..%d\n", ksft_test_num());
+}
+
+static inline void ksft_print_msg(const char *msg, ...)
+{
+ va_list args;
+
+ va_start(args, msg);
+ printf("# ");
+ vprintf(msg, args);
+ va_end(args);
+}
+
+static inline void ksft_test_result_pass(const char *msg, ...)
+{
+ va_list args;
+
+ ksft_cnt.ksft_pass++;
+
+ va_start(args, msg);
+ printf("ok %d ", ksft_test_num());
+ vprintf(msg, args);
+ va_end(args);
+}
+
+static inline void ksft_test_result_fail(const char *msg, ...)
+{
+ va_list args;
+
+ ksft_cnt.ksft_fail++;
+
+ va_start(args, msg);
+ printf("not ok %d ", ksft_test_num());
+ vprintf(msg, args);
+ va_end(args);
+}
+
+static inline void ksft_test_result_skip(const char *msg, ...)
+{
+ va_list args;
+
+ ksft_cnt.ksft_xskip++;
+
+ va_start(args, msg);
+ printf("ok %d # skip ", ksft_test_num());
+ vprintf(msg, args);
+ va_end(args);
+}
+
+static inline void ksft_test_result_error(const char *msg, ...)
+{
+ va_list args;
+
+ ksft_cnt.ksft_error++;
+
+ va_start(args, msg);
+ printf("not ok %d # error ", ksft_test_num());
+ vprintf(msg, args);
+ va_end(args);
+}
+
+static inline int ksft_exit_pass(void)
+{
+ ksft_print_cnts();
+ exit(KSFT_PASS);
+}
+
+static inline int ksft_exit_fail(void)
+{
+ printf("Bail out!\n");
+ ksft_print_cnts();
+ exit(KSFT_FAIL);
+}
+
+static inline int ksft_exit_fail_msg(const char *msg, ...)
+{
+ va_list args;
+
+ va_start(args, msg);
+ printf("Bail out! ");
+ vprintf(msg, args);
+ va_end(args);
+
+ ksft_print_cnts();
+ exit(KSFT_FAIL);
+}
+
+static inline int ksft_exit_xfail(void)
+{
+ ksft_print_cnts();
+ exit(KSFT_XFAIL);
+}
+
+static inline int ksft_exit_xpass(void)
+{
+ ksft_print_cnts();
+ exit(KSFT_XPASS);
+}
+
+static inline int ksft_exit_skip(const char *msg, ...)
+{
+ if (msg) {
+ va_list args;
+
+ va_start(args, msg);
+ printf("1..%d # Skipped: ", ksft_test_num());
+ vprintf(msg, args);
+ va_end(args);
+ } else {
+ ksft_print_cnts();
+ }
+ exit(KSFT_SKIP);
+}
+
+#endif /* __KSELFTEST_H */
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
new file mode 100644
index 000000000..76d654ef3
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -0,0 +1,779 @@
+/*
+ * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
+ * Use of this source code is governed by the GPLv2 license.
+ *
+ * kselftest_harness.h: simple C unit test helper.
+ *
+ * See documentation in Documentation/dev-tools/kselftest.rst
+ *
+ * API inspired by code.google.com/p/googletest
+ */
+
+/**
+ * DOC: example
+ *
+ * .. code-block:: c
+ *
+ * #include "../kselftest_harness.h"
+ *
+ * TEST(standalone_test) {
+ * do_some_stuff;
+ * EXPECT_GT(10, stuff) {
+ * stuff_state_t state;
+ * enumerate_stuff_state(&state);
+ * TH_LOG("expectation failed with state: %s", state.msg);
+ * }
+ * more_stuff;
+ * ASSERT_NE(some_stuff, NULL) TH_LOG("how did it happen?!");
+ * last_stuff;
+ * EXPECT_EQ(0, last_stuff);
+ * }
+ *
+ * FIXTURE(my_fixture) {
+ * mytype_t *data;
+ * int awesomeness_level;
+ * };
+ * FIXTURE_SETUP(my_fixture) {
+ * self->data = mytype_new();
+ * ASSERT_NE(NULL, self->data);
+ * }
+ * FIXTURE_TEARDOWN(my_fixture) {
+ * mytype_free(self->data);
+ * }
+ * TEST_F(my_fixture, data_is_good) {
+ * EXPECT_EQ(1, is_my_data_good(self->data));
+ * }
+ *
+ * TEST_HARNESS_MAIN
+ */
+
+#ifndef __KSELFTEST_HARNESS_H
+#define __KSELFTEST_HARNESS_H
+
+#define _GNU_SOURCE
+#include <asm/types.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+
+/* Utilities exposed to the test definitions */
+#ifndef TH_LOG_STREAM
+# define TH_LOG_STREAM stderr
+#endif
+
+#ifndef TH_LOG_ENABLED
+# define TH_LOG_ENABLED 1
+#endif
+
+/**
+ * TH_LOG(fmt, ...)
+ *
+ * @fmt: format string
+ * @...: optional arguments
+ *
+ * .. code-block:: c
+ *
+ * TH_LOG(format, ...)
+ *
+ * Optional debug logging function available for use in tests.
+ * Logging may be enabled or disabled by defining TH_LOG_ENABLED.
+ * E.g., #define TH_LOG_ENABLED 1
+ *
+ * If no definition is provided, logging is enabled by default.
+ *
+ * If there is no way to print an error message for the process running the
+ * test (e.g. not allowed to write to stderr), it is still possible to get the
+ * ASSERT_* number for which the test failed. This behavior can be enabled by
+ * writing `_metadata->no_print = true;` before the check sequence that is
+ * unable to print. When an error occur, instead of printing an error message
+ * and calling `abort(3)`, the test process call `_exit(2)` with the assert
+ * number as argument, which is then printed by the parent process.
+ */
+#define TH_LOG(fmt, ...) do { \
+ if (TH_LOG_ENABLED) \
+ __TH_LOG(fmt, ##__VA_ARGS__); \
+} while (0)
+
+/* Unconditional logger for internal use. */
+#define __TH_LOG(fmt, ...) \
+ fprintf(TH_LOG_STREAM, "%s:%d:%s:" fmt "\n", \
+ __FILE__, __LINE__, _metadata->name, ##__VA_ARGS__)
+
+/**
+ * XFAIL(statement, fmt, ...)
+ *
+ * @statement: statement to run after reporting XFAIL
+ * @fmt: format string
+ * @...: optional arguments
+ *
+ * This forces a "pass" after reporting a failure with an XFAIL prefix,
+ * and runs "statement", which is usually "return" or "goto skip".
+ */
+#define XFAIL(statement, fmt, ...) do { \
+ if (TH_LOG_ENABLED) { \
+ fprintf(TH_LOG_STREAM, "[ XFAIL! ] " fmt "\n", \
+ ##__VA_ARGS__); \
+ } \
+ /* TODO: find a way to pass xfail to test runner process. */ \
+ _metadata->passed = 1; \
+ _metadata->trigger = 0; \
+ statement; \
+} while (0)
+
+/**
+ * TEST(test_name) - Defines the test function and creates the registration
+ * stub
+ *
+ * @test_name: test name
+ *
+ * .. code-block:: c
+ *
+ * TEST(name) { implementation }
+ *
+ * Defines a test by name.
+ * Names must be unique and tests must not be run in parallel. The
+ * implementation containing block is a function and scoping should be treated
+ * as such. Returning early may be performed with a bare "return;" statement.
+ *
+ * EXPECT_* and ASSERT_* are valid in a TEST() { } context.
+ */
+#define TEST(test_name) __TEST_IMPL(test_name, -1)
+
+/**
+ * TEST_SIGNAL(test_name, signal)
+ *
+ * @test_name: test name
+ * @signal: signal number
+ *
+ * .. code-block:: c
+ *
+ * TEST_SIGNAL(name, signal) { implementation }
+ *
+ * Defines a test by name and the expected term signal.
+ * Names must be unique and tests must not be run in parallel. The
+ * implementation containing block is a function and scoping should be treated
+ * as such. Returning early may be performed with a bare "return;" statement.
+ *
+ * EXPECT_* and ASSERT_* are valid in a TEST() { } context.
+ */
+#define TEST_SIGNAL(test_name, signal) __TEST_IMPL(test_name, signal)
+
+#define __TEST_IMPL(test_name, _signal) \
+ static void test_name(struct __test_metadata *_metadata); \
+ static struct __test_metadata _##test_name##_object = \
+ { name: "global." #test_name, \
+ fn: &test_name, termsig: _signal }; \
+ static void __attribute__((constructor)) _register_##test_name(void) \
+ { \
+ __register_test(&_##test_name##_object); \
+ } \
+ static void test_name( \
+ struct __test_metadata __attribute__((unused)) *_metadata)
+
+/**
+ * FIXTURE_DATA(datatype_name) - Wraps the struct name so we have one less
+ * argument to pass around
+ *
+ * @datatype_name: datatype name
+ *
+ * .. code-block:: c
+ *
+ * FIXTURE_DATA(datatype name)
+ *
+ * This call may be used when the type of the fixture data
+ * is needed. In general, this should not be needed unless
+ * the *self* is being passed to a helper directly.
+ */
+#define FIXTURE_DATA(datatype_name) struct _test_data_##datatype_name
+
+/**
+ * FIXTURE(fixture_name) - Called once per fixture to setup the data and
+ * register
+ *
+ * @fixture_name: fixture name
+ *
+ * .. code-block:: c
+ *
+ * FIXTURE(datatype name) {
+ * type property1;
+ * ...
+ * };
+ *
+ * Defines the data provided to TEST_F()-defined tests as *self*. It should be
+ * populated and cleaned up using FIXTURE_SETUP() and FIXTURE_TEARDOWN().
+ */
+#define FIXTURE(fixture_name) \
+ static void __attribute__((constructor)) \
+ _register_##fixture_name##_data(void) \
+ { \
+ __fixture_count++; \
+ } \
+ FIXTURE_DATA(fixture_name)
+
+/**
+ * FIXTURE_SETUP(fixture_name) - Prepares the setup function for the fixture.
+ * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
+ *
+ * @fixture_name: fixture name
+ *
+ * .. code-block:: c
+ *
+ * FIXTURE_SETUP(fixture name) { implementation }
+ *
+ * Populates the required "setup" function for a fixture. An instance of the
+ * datatype defined with FIXTURE_DATA() will be exposed as *self* for the
+ * implementation.
+ *
+ * ASSERT_* are valid for use in this context and will prempt the execution
+ * of any dependent fixture tests.
+ *
+ * A bare "return;" statement may be used to return early.
+ */
+#define FIXTURE_SETUP(fixture_name) \
+ void fixture_name##_setup( \
+ struct __test_metadata __attribute__((unused)) *_metadata, \
+ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+/**
+ * FIXTURE_TEARDOWN(fixture_name)
+ * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
+ *
+ * @fixture_name: fixture name
+ *
+ * .. code-block:: c
+ *
+ * FIXTURE_TEARDOWN(fixture name) { implementation }
+ *
+ * Populates the required "teardown" function for a fixture. An instance of the
+ * datatype defined with FIXTURE_DATA() will be exposed as *self* for the
+ * implementation to clean up.
+ *
+ * A bare "return;" statement may be used to return early.
+ */
+#define FIXTURE_TEARDOWN(fixture_name) \
+ void fixture_name##_teardown( \
+ struct __test_metadata __attribute__((unused)) *_metadata, \
+ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+
+/**
+ * TEST_F(fixture_name, test_name) - Emits test registration and helpers for
+ * fixture-based test cases
+ *
+ * @fixture_name: fixture name
+ * @test_name: test name
+ *
+ * .. code-block:: c
+ *
+ * TEST_F(fixture, name) { implementation }
+ *
+ * Defines a test that depends on a fixture (e.g., is part of a test case).
+ * Very similar to TEST() except that *self* is the setup instance of fixture's
+ * datatype exposed for use by the implementation.
+ *
+ * Warning: use of ASSERT_* here will skip TEARDOWN.
+ */
+/* TODO(wad) register fixtures on dedicated test lists. */
+#define TEST_F(fixture_name, test_name) \
+ __TEST_F_IMPL(fixture_name, test_name, -1)
+
+#define TEST_F_SIGNAL(fixture_name, test_name, signal) \
+ __TEST_F_IMPL(fixture_name, test_name, signal)
+
+#define __TEST_F_IMPL(fixture_name, test_name, signal) \
+ static void fixture_name##_##test_name( \
+ struct __test_metadata *_metadata, \
+ FIXTURE_DATA(fixture_name) *self); \
+ static inline void wrapper_##fixture_name##_##test_name( \
+ struct __test_metadata *_metadata) \
+ { \
+ /* fixture data is alloced, setup, and torn down per call. */ \
+ FIXTURE_DATA(fixture_name) self; \
+ memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
+ fixture_name##_setup(_metadata, &self); \
+ /* Let setup failure terminate early. */ \
+ if (!_metadata->passed) \
+ return; \
+ fixture_name##_##test_name(_metadata, &self); \
+ fixture_name##_teardown(_metadata, &self); \
+ } \
+ static struct __test_metadata \
+ _##fixture_name##_##test_name##_object = { \
+ name: #fixture_name "." #test_name, \
+ fn: &wrapper_##fixture_name##_##test_name, \
+ termsig: signal, \
+ }; \
+ static void __attribute__((constructor)) \
+ _register_##fixture_name##_##test_name(void) \
+ { \
+ __register_test(&_##fixture_name##_##test_name##_object); \
+ } \
+ static void fixture_name##_##test_name( \
+ struct __test_metadata __attribute__((unused)) *_metadata, \
+ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+
+/**
+ * TEST_HARNESS_MAIN - Simple wrapper to run the test harness
+ *
+ * .. code-block:: c
+ *
+ * TEST_HARNESS_MAIN
+ *
+ * Use once to append a main() to the test file.
+ */
+#define TEST_HARNESS_MAIN \
+ static void __attribute__((constructor)) \
+ __constructor_order_last(void) \
+ { \
+ if (!__constructor_order) \
+ __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; \
+ } \
+ int main(int argc, char **argv) { \
+ return test_harness_run(argc, argv); \
+ }
+
+/**
+ * DOC: operators
+ *
+ * Operators for use in TEST() and TEST_F().
+ * ASSERT_* calls will stop test execution immediately.
+ * EXPECT_* calls will emit a failure warning, note it, and continue.
+ */
+
+/**
+ * ASSERT_EQ(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_EQ(expected, measured): expected == measured
+ */
+#define ASSERT_EQ(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, ==, 1)
+
+/**
+ * ASSERT_NE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_NE(expected, measured): expected != measured
+ */
+#define ASSERT_NE(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, !=, 1)
+
+/**
+ * ASSERT_LT(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_LT(expected, measured): expected < measured
+ */
+#define ASSERT_LT(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, <, 1)
+
+/**
+ * ASSERT_LE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_LE(expected, measured): expected <= measured
+ */
+#define ASSERT_LE(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, <=, 1)
+
+/**
+ * ASSERT_GT(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_GT(expected, measured): expected > measured
+ */
+#define ASSERT_GT(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, >, 1)
+
+/**
+ * ASSERT_GE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_GE(expected, measured): expected >= measured
+ */
+#define ASSERT_GE(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, >=, 1)
+
+/**
+ * ASSERT_NULL(seen)
+ *
+ * @seen: measured value
+ *
+ * ASSERT_NULL(measured): NULL == measured
+ */
+#define ASSERT_NULL(seen) \
+ __EXPECT(NULL, "NULL", seen, #seen, ==, 1)
+
+/**
+ * ASSERT_TRUE(seen)
+ *
+ * @seen: measured value
+ *
+ * ASSERT_TRUE(measured): measured != 0
+ */
+#define ASSERT_TRUE(seen) \
+ __EXPECT(0, "0", seen, #seen, !=, 1)
+
+/**
+ * ASSERT_FALSE(seen)
+ *
+ * @seen: measured value
+ *
+ * ASSERT_FALSE(measured): measured == 0
+ */
+#define ASSERT_FALSE(seen) \
+ __EXPECT(0, "0", seen, #seen, ==, 1)
+
+/**
+ * ASSERT_STREQ(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_STREQ(expected, measured): !strcmp(expected, measured)
+ */
+#define ASSERT_STREQ(expected, seen) \
+ __EXPECT_STR(expected, seen, ==, 1)
+
+/**
+ * ASSERT_STRNE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_STRNE(expected, measured): strcmp(expected, measured)
+ */
+#define ASSERT_STRNE(expected, seen) \
+ __EXPECT_STR(expected, seen, !=, 1)
+
+/**
+ * EXPECT_EQ(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_EQ(expected, measured): expected == measured
+ */
+#define EXPECT_EQ(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, ==, 0)
+
+/**
+ * EXPECT_NE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_NE(expected, measured): expected != measured
+ */
+#define EXPECT_NE(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, !=, 0)
+
+/**
+ * EXPECT_LT(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_LT(expected, measured): expected < measured
+ */
+#define EXPECT_LT(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, <, 0)
+
+/**
+ * EXPECT_LE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_LE(expected, measured): expected <= measured
+ */
+#define EXPECT_LE(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, <=, 0)
+
+/**
+ * EXPECT_GT(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_GT(expected, measured): expected > measured
+ */
+#define EXPECT_GT(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, >, 0)
+
+/**
+ * EXPECT_GE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_GE(expected, measured): expected >= measured
+ */
+#define EXPECT_GE(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, >=, 0)
+
+/**
+ * EXPECT_NULL(seen)
+ *
+ * @seen: measured value
+ *
+ * EXPECT_NULL(measured): NULL == measured
+ */
+#define EXPECT_NULL(seen) \
+ __EXPECT(NULL, "NULL", seen, #seen, ==, 0)
+
+/**
+ * EXPECT_TRUE(seen)
+ *
+ * @seen: measured value
+ *
+ * EXPECT_TRUE(measured): 0 != measured
+ */
+#define EXPECT_TRUE(seen) \
+ __EXPECT(0, "0", seen, #seen, !=, 0)
+
+/**
+ * EXPECT_FALSE(seen)
+ *
+ * @seen: measured value
+ *
+ * EXPECT_FALSE(measured): 0 == measured
+ */
+#define EXPECT_FALSE(seen) \
+ __EXPECT(0, "0", seen, #seen, ==, 0)
+
+/**
+ * EXPECT_STREQ(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_STREQ(expected, measured): !strcmp(expected, measured)
+ */
+#define EXPECT_STREQ(expected, seen) \
+ __EXPECT_STR(expected, seen, ==, 0)
+
+/**
+ * EXPECT_STRNE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_STRNE(expected, measured): strcmp(expected, measured)
+ */
+#define EXPECT_STRNE(expected, seen) \
+ __EXPECT_STR(expected, seen, !=, 0)
+
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+
+/* Support an optional handler after and ASSERT_* or EXPECT_*. The approach is
+ * not thread-safe, but it should be fine in most sane test scenarios.
+ *
+ * Using __bail(), which optionally abort()s, is the easiest way to early
+ * return while still providing an optional block to the API consumer.
+ */
+#define OPTIONAL_HANDLER(_assert) \
+ for (; _metadata->trigger; _metadata->trigger = \
+ __bail(_assert, _metadata->no_print, _metadata->step))
+
+#define __INC_STEP(_metadata) \
+ if (_metadata->passed && _metadata->step < 255) \
+ _metadata->step++;
+
+#define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \
+ /* Avoid multiple evaluation of the cases */ \
+ __typeof__(_expected) __exp = (_expected); \
+ __typeof__(_seen) __seen = (_seen); \
+ if (_assert) __INC_STEP(_metadata); \
+ if (!(__exp _t __seen)) { \
+ unsigned long long __exp_print = (uintptr_t)__exp; \
+ unsigned long long __seen_print = (uintptr_t)__seen; \
+ __TH_LOG("Expected %s (%llu) %s %s (%llu)", \
+ _expected_str, __exp_print, #_t, \
+ _seen_str, __seen_print); \
+ _metadata->passed = 0; \
+ /* Ensure the optional handler is triggered */ \
+ _metadata->trigger = 1; \
+ } \
+} while (0); OPTIONAL_HANDLER(_assert)
+
+#define __EXPECT_STR(_expected, _seen, _t, _assert) do { \
+ const char *__exp = (_expected); \
+ const char *__seen = (_seen); \
+ if (_assert) __INC_STEP(_metadata); \
+ if (!(strcmp(__exp, __seen) _t 0)) { \
+ __TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \
+ _metadata->passed = 0; \
+ _metadata->trigger = 1; \
+ } \
+} while (0); OPTIONAL_HANDLER(_assert)
+
+/* Contains all the information for test execution and status checking. */
+struct __test_metadata {
+ const char *name;
+ void (*fn)(struct __test_metadata *);
+ int termsig;
+ int passed;
+ int trigger; /* extra handler after the evaluation */
+ __u8 step;
+ bool no_print; /* manual trigger when TH_LOG_STREAM is not available */
+ struct __test_metadata *prev, *next;
+};
+
+/* Storage for the (global) tests to be run. */
+static struct __test_metadata *__test_list;
+static unsigned int __test_count;
+static unsigned int __fixture_count;
+static int __constructor_order;
+
+#define _CONSTRUCTOR_ORDER_FORWARD 1
+#define _CONSTRUCTOR_ORDER_BACKWARD -1
+
+/*
+ * Since constructors are called in reverse order, reverse the test
+ * list so tests are run in source declaration order.
+ * https://gcc.gnu.org/onlinedocs/gccint/Initialization.html
+ * However, it seems not all toolchains do this correctly, so use
+ * __constructor_order to detect which direction is called first
+ * and adjust list building logic to get things running in the right
+ * direction.
+ */
+static inline void __register_test(struct __test_metadata *t)
+{
+ __test_count++;
+ /* Circular linked list where only prev is circular. */
+ if (__test_list == NULL) {
+ __test_list = t;
+ t->next = NULL;
+ t->prev = t;
+ return;
+ }
+ if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) {
+ t->next = NULL;
+ t->prev = __test_list->prev;
+ t->prev->next = t;
+ __test_list->prev = t;
+ } else {
+ t->next = __test_list;
+ t->next->prev = t;
+ t->prev = t;
+ __test_list = t;
+ }
+}
+
+static inline int __bail(int for_realz, bool no_print, __u8 step)
+{
+ if (for_realz) {
+ if (no_print)
+ _exit(step);
+ abort();
+ }
+ return 0;
+}
+
+void __run_test(struct __test_metadata *t)
+{
+ pid_t child_pid;
+ int status;
+
+ t->passed = 1;
+ t->trigger = 0;
+ printf("[ RUN ] %s\n", t->name);
+ child_pid = fork();
+ if (child_pid < 0) {
+ printf("ERROR SPAWNING TEST CHILD\n");
+ t->passed = 0;
+ } else if (child_pid == 0) {
+ t->fn(t);
+ /* return the step that failed or 0 */
+ _exit(t->passed ? 0 : t->step);
+ } else {
+ /* TODO(wad) add timeout support. */
+ waitpid(child_pid, &status, 0);
+ if (WIFEXITED(status)) {
+ t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0;
+ if (t->termsig != -1) {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test exited normally "
+ "instead of by signal (code: %d)\n",
+ t->name,
+ WEXITSTATUS(status));
+ } else if (!t->passed) {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test failed at step #%d\n",
+ t->name,
+ WEXITSTATUS(status));
+ }
+ } else if (WIFSIGNALED(status)) {
+ t->passed = 0;
+ if (WTERMSIG(status) == SIGABRT) {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test terminated by assertion\n",
+ t->name);
+ } else if (WTERMSIG(status) == t->termsig) {
+ t->passed = 1;
+ } else {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test terminated unexpectedly "
+ "by signal %d\n",
+ t->name,
+ WTERMSIG(status));
+ }
+ } else {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test ended in some other way [%u]\n",
+ t->name,
+ status);
+ }
+ }
+ printf("[ %4s ] %s\n", (t->passed ? "OK" : "FAIL"), t->name);
+}
+
+static int test_harness_run(int __attribute__((unused)) argc,
+ char __attribute__((unused)) **argv)
+{
+ struct __test_metadata *t;
+ int ret = 0;
+ unsigned int count = 0;
+ unsigned int pass_count = 0;
+
+ /* TODO(wad) add optional arguments similar to gtest. */
+ printf("[==========] Running %u tests from %u test cases.\n",
+ __test_count, __fixture_count + 1);
+ for (t = __test_list; t; t = t->next) {
+ count++;
+ __run_test(t);
+ if (t->passed)
+ pass_count++;
+ else
+ ret = 1;
+ }
+ printf("[==========] %u / %u tests passed.\n", pass_count, count);
+ printf("[ %s ]\n", (ret ? "FAILED" : "PASSED"));
+ return ret;
+}
+
+static void __attribute__((constructor)) __constructor_order_first(void)
+{
+ if (!__constructor_order)
+ __constructor_order = _CONSTRUCTOR_ORDER_FORWARD;
+}
+
+#endif /* __KSELFTEST_HARNESS_H */
diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh
new file mode 100755
index 000000000..ec3044638
--- /dev/null
+++ b/tools/testing/selftests/kselftest_install.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Kselftest Install
+# Install kselftest tests
+# Author: Shuah Khan <shuahkh@osg.samsung.com>
+# Copyright (C) 2015 Samsung Electronics Co., Ltd.
+
+install_loc=`pwd`
+
+main()
+{
+ if [ $(basename $install_loc) != "selftests" ]; then
+ echo "$0: Please run it in selftests directory ..."
+ exit 1;
+ fi
+ if [ "$#" -eq 0 ]; then
+ echo "$0: Installing in default location - $install_loc ..."
+ elif [ ! -d "$1" ]; then
+ echo "$0: $1 doesn't exist!!"
+ exit 1;
+ else
+ install_loc=$1
+ echo "$0: Installing in specified location - $install_loc ..."
+ fi
+
+ install_dir=$install_loc/kselftest
+
+# Create install directory
+ mkdir -p $install_dir
+# Build tests
+ INSTALL_PATH=$install_dir make install
+}
+
+main "$@"
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
new file mode 100644
index 000000000..5c34752e1
--- /dev/null
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -0,0 +1,6 @@
+cr4_cpuid_sync_test
+platform_info_test
+set_sregs_test
+sync_regs_test
+vmx_tsc_adjust_test
+state_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
new file mode 100644
index 000000000..cc83e2fd3
--- /dev/null
+++ b/tools/testing/selftests/kvm/Makefile
@@ -0,0 +1,43 @@
+all:
+
+top_srcdir = ../../../../
+KSFT_KHDR_INSTALL := 1
+UNAME_M := $(shell uname -m)
+
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
+LIBKVM_x86_64 = lib/x86.c lib/vmx.c
+
+TEST_GEN_PROGS_x86_64 = platform_info_test
+TEST_GEN_PROGS_x86_64 += set_sregs_test
+TEST_GEN_PROGS_x86_64 += sync_regs_test
+TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86_64 += state_test
+TEST_GEN_PROGS_x86_64 += dirty_log_test
+
+TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
+LIBKVM += $(LIBKVM_$(UNAME_M))
+
+INSTALL_HDR_PATH = $(top_srcdir)/usr
+LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
+LINUX_TOOL_INCLUDE = $(top_srcdir)tools/include
+CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
+LDFLAGS += -pthread
+
+# After inclusion, $(OUTPUT) is defined and
+# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
+include ../lib.mk
+
+STATIC_LIBS := $(OUTPUT)/libkvm.a
+LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
+EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS)
+
+x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
+$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
+ $(AR) crs $@ $^
+
+all: $(STATIC_LIBS)
+$(TEST_GEN_PROGS): $(STATIC_LIBS)
diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config
new file mode 100644
index 000000000..63ed533f7
--- /dev/null
+++ b/tools/testing/selftests/kvm/config
@@ -0,0 +1,3 @@
+CONFIG_KVM=y
+CONFIG_KVM_INTEL=y
+CONFIG_KVM_AMD=y
diff --git a/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c
new file mode 100644
index 000000000..11ec358bf
--- /dev/null
+++ b/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CR4 and CPUID sync test
+ *
+ * Copyright 2018, Red Hat, Inc. and/or its affiliates.
+ *
+ * Author:
+ * Wei Huang <wei@redhat.com>
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "x86.h"
+
+#define X86_FEATURE_XSAVE (1<<26)
+#define X86_FEATURE_OSXSAVE (1<<27)
+#define VCPU_ID 1
+
+static inline bool cr4_cpuid_is_sync(void)
+{
+ int func, subfunc;
+ uint32_t eax, ebx, ecx, edx;
+ uint64_t cr4;
+
+ func = 0x1;
+ subfunc = 0x0;
+ __asm__ __volatile__("cpuid"
+ : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
+ : "a"(func), "c"(subfunc));
+
+ cr4 = get_cr4();
+
+ return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE));
+}
+
+static void guest_code(void)
+{
+ uint64_t cr4;
+
+ /* turn on CR4.OSXSAVE */
+ cr4 = get_cr4();
+ cr4 |= X86_CR4_OSXSAVE;
+ set_cr4(cr4);
+
+ /* verify CR4.OSXSAVE == CPUID.OSXSAVE */
+ GUEST_ASSERT(cr4_cpuid_is_sync());
+
+ /* notify hypervisor to change CR4 */
+ GUEST_SYNC(0);
+
+ /* check again */
+ GUEST_ASSERT(cr4_cpuid_is_sync());
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct kvm_sregs sregs;
+ struct kvm_cpuid_entry2 *entry;
+ int rc;
+
+ entry = kvm_get_supported_cpuid_entry(1);
+ if (!(entry->ecx & X86_FEATURE_XSAVE)) {
+ printf("XSAVE feature not supported, skipping test\n");
+ return 0;
+ }
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ run = vcpu_state(vm, VCPU_ID);
+
+ while (1) {
+ rc = _vcpu_run(vm, VCPU_ID);
+
+ if (run->exit_reason == KVM_EXIT_IO) {
+ switch (run->io.port) {
+ case GUEST_PORT_SYNC:
+ /* emulate hypervisor clearing CR4.OSXSAVE */
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ sregs.cr4 &= ~X86_CR4_OSXSAVE;
+ vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ break;
+ case GUEST_PORT_ABORT:
+ TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
+ break;
+ case GUEST_PORT_DONE:
+ goto done;
+ default:
+ TEST_ASSERT(false, "Unknown port 0x%x.",
+ run->io.port);
+ }
+ }
+ }
+
+ kvm_vm_free(vm);
+
+done:
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
new file mode 100644
index 000000000..a9c4b5e21
--- /dev/null
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging test
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define DEBUG printf
+
+#define VCPU_ID 1
+/* The memory slot index to track dirty pages */
+#define TEST_MEM_SLOT_INDEX 1
+/*
+ * GPA offset of the testing memory slot. Must be bigger than the
+ * default vm mem slot, which is DEFAULT_GUEST_PHY_PAGES.
+ */
+#define TEST_MEM_OFFSET (1ULL << 30) /* 1G */
+/* Size of the testing memory slot */
+#define TEST_MEM_PAGES (1ULL << 18) /* 1G for 4K pages */
+/* How many pages to dirty for each guest loop */
+#define TEST_PAGES_PER_LOOP 1024
+/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
+#define TEST_HOST_LOOP_N 32UL
+/* Interval for each host loop (ms) */
+#define TEST_HOST_LOOP_INTERVAL 10UL
+
+/*
+ * Guest variables. We use these variables to share data between host
+ * and guest. There are two copies of the variables, one in host memory
+ * (which is unused) and one in guest memory. When the host wants to
+ * access these variables, it needs to call addr_gva2hva() to access the
+ * guest copy.
+ */
+uint64_t guest_random_array[TEST_PAGES_PER_LOOP];
+uint64_t guest_iteration;
+uint64_t guest_page_size;
+
+/*
+ * Writes to the first byte of a random page within the testing memory
+ * region continuously.
+ */
+void guest_code(void)
+{
+ int i = 0;
+ uint64_t volatile *array = guest_random_array;
+ uint64_t volatile *guest_addr;
+
+ while (true) {
+ for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
+ /*
+ * Write to the first 8 bytes of a random page
+ * on the testing memory region.
+ */
+ guest_addr = (uint64_t *)
+ (TEST_MEM_OFFSET +
+ (array[i] % TEST_MEM_PAGES) * guest_page_size);
+ *guest_addr = guest_iteration;
+ }
+ /* Tell the host that we need more random numbers */
+ GUEST_SYNC(1);
+ }
+}
+
+/*
+ * Host variables. These variables should only be used by the host
+ * rather than the guest.
+ */
+bool host_quit;
+
+/* Points to the test VM memory region on which we track dirty logs */
+void *host_test_mem;
+
+/* For statistics only */
+uint64_t host_dirty_count;
+uint64_t host_clear_count;
+uint64_t host_track_next_count;
+
+/*
+ * We use this bitmap to track some pages that should have its dirty
+ * bit set in the _next_ iteration. For example, if we detected the
+ * page value changed to current iteration but at the same time the
+ * page bit is cleared in the latest bitmap, then the system must
+ * report that write in the next get dirty log call.
+ */
+unsigned long *host_bmap_track;
+
+void generate_random_array(uint64_t *guest_array, uint64_t size)
+{
+ uint64_t i;
+
+ for (i = 0; i < size; i++) {
+ guest_array[i] = random();
+ }
+}
+
+void *vcpu_worker(void *data)
+{
+ int ret;
+ uint64_t loops, *guest_array, pages_count = 0;
+ struct kvm_vm *vm = data;
+ struct kvm_run *run;
+ struct guest_args args;
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ /* Retrieve the guest random array pointer and cache it */
+ guest_array = addr_gva2hva(vm, (vm_vaddr_t)guest_random_array);
+
+ DEBUG("VCPU starts\n");
+
+ generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
+
+ while (!READ_ONCE(host_quit)) {
+ /* Let the guest to dirty these random pages */
+ ret = _vcpu_run(vm, VCPU_ID);
+ guest_args_read(vm, VCPU_ID, &args);
+ if (run->exit_reason == KVM_EXIT_IO &&
+ args.port == GUEST_PORT_SYNC) {
+ pages_count += TEST_PAGES_PER_LOOP;
+ generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
+ } else {
+ TEST_ASSERT(false,
+ "Invalid guest sync status: "
+ "exit_reason=%s\n",
+ exit_reason_str(run->exit_reason));
+ }
+ }
+
+ DEBUG("VCPU exits, dirtied %"PRIu64" pages\n", pages_count);
+
+ return NULL;
+}
+
+void vm_dirty_log_verify(unsigned long *bmap, uint64_t iteration)
+{
+ uint64_t page;
+ uint64_t volatile *value_ptr;
+
+ for (page = 0; page < TEST_MEM_PAGES; page++) {
+ value_ptr = host_test_mem + page * getpagesize();
+
+ /* If this is a special page that we were tracking... */
+ if (test_and_clear_bit(page, host_bmap_track)) {
+ host_track_next_count++;
+ TEST_ASSERT(test_bit(page, bmap),
+ "Page %"PRIu64" should have its dirty bit "
+ "set in this iteration but it is missing",
+ page);
+ }
+
+ if (test_bit(page, bmap)) {
+ host_dirty_count++;
+ /*
+ * If the bit is set, the value written onto
+ * the corresponding page should be either the
+ * previous iteration number or the current one.
+ */
+ TEST_ASSERT(*value_ptr == iteration ||
+ *value_ptr == iteration - 1,
+ "Set page %"PRIu64" value %"PRIu64
+ " incorrect (iteration=%"PRIu64")",
+ page, *value_ptr, iteration);
+ } else {
+ host_clear_count++;
+ /*
+ * If cleared, the value written can be any
+ * value smaller or equals to the iteration
+ * number. Note that the value can be exactly
+ * (iteration-1) if that write can happen
+ * like this:
+ *
+ * (1) increase loop count to "iteration-1"
+ * (2) write to page P happens (with value
+ * "iteration-1")
+ * (3) get dirty log for "iteration-1"; we'll
+ * see that page P bit is set (dirtied),
+ * and not set the bit in host_bmap_track
+ * (4) increase loop count to "iteration"
+ * (which is current iteration)
+ * (5) get dirty log for current iteration,
+ * we'll see that page P is cleared, with
+ * value "iteration-1".
+ */
+ TEST_ASSERT(*value_ptr <= iteration,
+ "Clear page %"PRIu64" value %"PRIu64
+ " incorrect (iteration=%"PRIu64")",
+ page, *value_ptr, iteration);
+ if (*value_ptr == iteration) {
+ /*
+ * This page is _just_ modified; it
+ * should report its dirtyness in the
+ * next run
+ */
+ set_bit(page, host_bmap_track);
+ }
+ }
+ }
+}
+
+void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-i iterations] [-I interval] [-h]\n", name);
+ puts("");
+ printf(" -i: specify iteration counts (default: %"PRIu64")\n",
+ TEST_HOST_LOOP_N);
+ printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
+ TEST_HOST_LOOP_INTERVAL);
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ pthread_t vcpu_thread;
+ struct kvm_vm *vm;
+ uint64_t volatile *psize, *iteration;
+ unsigned long *bmap, iterations = TEST_HOST_LOOP_N,
+ interval = TEST_HOST_LOOP_INTERVAL;
+ int opt;
+
+ while ((opt = getopt(argc, argv, "hi:I:")) != -1) {
+ switch (opt) {
+ case 'i':
+ iterations = strtol(optarg, NULL, 10);
+ break;
+ case 'I':
+ interval = strtol(optarg, NULL, 10);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ TEST_ASSERT(iterations > 2, "Iteration must be bigger than zero\n");
+ TEST_ASSERT(interval > 0, "Interval must be bigger than zero");
+
+ DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
+ iterations, interval);
+
+ srandom(time(0));
+
+ bmap = bitmap_alloc(TEST_MEM_PAGES);
+ host_bmap_track = bitmap_alloc(TEST_MEM_PAGES);
+
+ vm = vm_create_default(VCPU_ID, TEST_MEM_PAGES, guest_code);
+
+ /* Add an extra memory slot for testing dirty logging */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ TEST_MEM_OFFSET,
+ TEST_MEM_SLOT_INDEX,
+ TEST_MEM_PAGES,
+ KVM_MEM_LOG_DIRTY_PAGES);
+ /* Cache the HVA pointer of the region */
+ host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)TEST_MEM_OFFSET);
+
+ /* Do 1:1 mapping for the dirty track memory slot */
+ virt_map(vm, TEST_MEM_OFFSET, TEST_MEM_OFFSET,
+ TEST_MEM_PAGES * getpagesize(), 0);
+
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ /* Tell the guest about the page size on the system */
+ psize = addr_gva2hva(vm, (vm_vaddr_t)&guest_page_size);
+ *psize = getpagesize();
+
+ /* Start the iterations */
+ iteration = addr_gva2hva(vm, (vm_vaddr_t)&guest_iteration);
+ *iteration = 1;
+
+ /* Start dirtying pages */
+ pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
+
+ while (*iteration < iterations) {
+ /* Give the vcpu thread some time to dirty some pages */
+ usleep(interval * 1000);
+ kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+ vm_dirty_log_verify(bmap, *iteration);
+ (*iteration)++;
+ }
+
+ /* Tell the vcpu thread to quit */
+ host_quit = true;
+ pthread_join(vcpu_thread, NULL);
+
+ DEBUG("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
+ "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
+ host_track_next_count);
+
+ free(bmap);
+ free(host_bmap_track);
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
new file mode 100644
index 000000000..3acf9a917
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -0,0 +1,194 @@
+/*
+ * tools/testing/selftests/kvm/include/kvm_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+#ifndef SELFTEST_KVM_UTIL_H
+#define SELFTEST_KVM_UTIL_H 1
+
+#include "test_util.h"
+
+#include "asm/kvm.h"
+#include "linux/kvm.h"
+#include <sys/ioctl.h>
+
+#include "sparsebit.h"
+
+/*
+ * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be
+ * created. Only applies to VMs using EPT.
+ */
+#define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul
+
+
+/* Callers of kvm_util only have an incomplete/opaque description of the
+ * structure kvm_util is using to maintain the state of a VM.
+ */
+struct kvm_vm;
+
+typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
+typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+
+/* Minimum allocated guest virtual and physical addresses */
+#define KVM_UTIL_MIN_VADDR 0x2000
+
+#define DEFAULT_GUEST_PHY_PAGES 512
+#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
+#define DEFAULT_STACK_PGS 5
+
+enum vm_guest_mode {
+ VM_MODE_FLAT48PG,
+};
+
+enum vm_mem_backing_src_type {
+ VM_MEM_SRC_ANONYMOUS,
+ VM_MEM_SRC_ANONYMOUS_THP,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB,
+};
+
+int kvm_check_cap(long cap);
+int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
+
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
+void kvm_vm_free(struct kvm_vm *vmp);
+void kvm_vm_restart(struct kvm_vm *vmp, int perm);
+void kvm_vm_release(struct kvm_vm *vmp);
+void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
+
+int kvm_memcmp_hva_gva(void *hva,
+ struct kvm_vm *vm, const vm_vaddr_t gva, size_t len);
+
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
+ uint32_t data_memslot, uint32_t pgd_memslot);
+
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+void vcpu_dump(FILE *stream, struct kvm_vm *vm,
+ uint32_t vcpuid, uint8_t indent);
+
+void vm_create_irqchip(struct kvm_vm *vm);
+
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags);
+
+void vcpu_ioctl(struct kvm_vm *vm,
+ uint32_t vcpuid, unsigned long ioctl, void *arg);
+void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+ uint32_t data_memslot, uint32_t pgd_memslot);
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ size_t size, uint32_t pgd_memslot);
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
+
+struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_mp_state *mp_state);
+void vcpu_regs_get(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_regs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
+void vcpu_sregs_get(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs);
+void vcpu_sregs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs);
+int _vcpu_sregs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs);
+void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events);
+void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events);
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+ uint64_t msr_value);
+
+const char *exit_reason_str(unsigned int exit_reason);
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint32_t pgd_memslot);
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
+ vm_paddr_t paddr_min, uint32_t memslot);
+
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+void vcpu_set_cpuid(
+ struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid);
+
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
+
+static inline struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_entry(uint32_t function)
+{
+ return kvm_get_supported_cpuid_index(function, 0);
+}
+
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_size,
+ void *guest_code);
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
+
+typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr,
+ vm_paddr_t vmxon_paddr,
+ vm_vaddr_t vmcs_vaddr,
+ vm_paddr_t vmcs_paddr);
+
+struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
+ uint64_t end);
+
+struct kvm_dirty_log *
+allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
+
+int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
+
+#define GUEST_PORT_SYNC 0x1000
+#define GUEST_PORT_ABORT 0x1001
+#define GUEST_PORT_DONE 0x1002
+
+static inline void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1)
+{
+ __asm__ __volatile__("in %[port], %%al"
+ :
+ : [port]"d"(port), "D"(arg0), "S"(arg1)
+ : "rax");
+}
+
+/*
+ * Allows to pass three arguments to the host: port is 16bit wide,
+ * arg0 & arg1 are 64bit wide
+ */
+#define GUEST_SYNC_ARGS(_port, _arg0, _arg1) \
+ __exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1))
+
+#define GUEST_ASSERT(_condition) do { \
+ if (!(_condition)) \
+ GUEST_SYNC_ARGS(GUEST_PORT_ABORT, \
+ "Failed guest assert: " \
+ #_condition, __LINE__); \
+ } while (0)
+
+#define GUEST_SYNC(stage) GUEST_SYNC_ARGS(GUEST_PORT_SYNC, "hello", stage)
+
+#define GUEST_DONE() GUEST_SYNC_ARGS(GUEST_PORT_DONE, 0, 0)
+
+struct guest_args {
+ uint64_t arg0;
+ uint64_t arg1;
+ uint16_t port;
+} __attribute__ ((packed));
+
+void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct guest_args *args);
+
+#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h
new file mode 100644
index 000000000..54cfeb656
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/sparsebit.h
@@ -0,0 +1,75 @@
+/*
+ * tools/testing/selftests/kvm/include/sparsebit.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ *
+ * Header file that describes API to the sparsebit library.
+ * This library provides a memory efficient means of storing
+ * the settings of bits indexed via a uint64_t. Memory usage
+ * is reasonable, significantly less than (2^64 / 8) bytes, as
+ * long as bits that are mostly set or mostly cleared are close
+ * to each other. This library is efficient in memory usage
+ * even in the case where most bits are set.
+ */
+
+#ifndef _TEST_SPARSEBIT_H_
+#define _TEST_SPARSEBIT_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct sparsebit;
+typedef uint64_t sparsebit_idx_t;
+typedef uint64_t sparsebit_num_t;
+
+struct sparsebit *sparsebit_alloc(void);
+void sparsebit_free(struct sparsebit **sbitp);
+void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src);
+
+bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_set_num(struct sparsebit *sbit,
+ sparsebit_idx_t idx, sparsebit_num_t num);
+bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_clear_num(struct sparsebit *sbit,
+ sparsebit_idx_t idx, sparsebit_num_t num);
+sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit);
+bool sparsebit_any_set(struct sparsebit *sbit);
+bool sparsebit_any_clear(struct sparsebit *sbit);
+bool sparsebit_all_set(struct sparsebit *sbit);
+bool sparsebit_all_clear(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit,
+ sparsebit_idx_t start, sparsebit_num_t num);
+sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit,
+ sparsebit_idx_t start, sparsebit_num_t num);
+
+void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx);
+void sparsebit_set_num(struct sparsebit *sbitp, sparsebit_idx_t start,
+ sparsebit_num_t num);
+void sparsebit_set_all(struct sparsebit *sbitp);
+
+void sparsebit_clear(struct sparsebit *sbitp, sparsebit_idx_t idx);
+void sparsebit_clear_num(struct sparsebit *sbitp,
+ sparsebit_idx_t start, sparsebit_num_t num);
+void sparsebit_clear_all(struct sparsebit *sbitp);
+
+void sparsebit_dump(FILE *stream, struct sparsebit *sbit,
+ unsigned int indent);
+void sparsebit_validate_internal(struct sparsebit *sbit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TEST_SPARSEBIT_H_ */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
new file mode 100644
index 000000000..73c393343
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -0,0 +1,44 @@
+/*
+ * tools/testing/selftests/kvm/include/test_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef TEST_UTIL_H
+#define TEST_UTIL_H 1
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include "kselftest.h"
+
+ssize_t test_write(int fd, const void *buf, size_t count);
+ssize_t test_read(int fd, void *buf, size_t count);
+int test_seq_read(const char *path, char **bufp, size_t *sizep);
+
+void test_assert(bool exp, const char *exp_str,
+ const char *file, unsigned int line, const char *fmt, ...);
+
+#define TEST_ASSERT(e, fmt, ...) \
+ test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
+
+#define ASSERT_EQ(a, b) do { \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ TEST_ASSERT(__a == __b, \
+ "ASSERT_EQ(%s, %s) failed.\n" \
+ "\t%s is %#lx\n" \
+ "\t%s is %#lx", \
+ #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
+} while (0)
+
+#endif /* TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/vmx.h
new file mode 100644
index 000000000..b9ffe1024
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/vmx.h
@@ -0,0 +1,552 @@
+/*
+ * tools/testing/selftests/kvm/include/vmx.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef SELFTEST_KVM_VMX_H
+#define SELFTEST_KVM_VMX_H
+
+#include <stdint.h>
+#include "x86.h"
+
+#define CPUID_VMX_BIT 5
+
+#define CPUID_VMX (1 << 5)
+
+/*
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
+#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
+#define CPU_BASED_USE_TSC_OFFSETING 0x00000008
+#define CPU_BASED_HLT_EXITING 0x00000080
+#define CPU_BASED_INVLPG_EXITING 0x00000200
+#define CPU_BASED_MWAIT_EXITING 0x00000400
+#define CPU_BASED_RDPMC_EXITING 0x00000800
+#define CPU_BASED_RDTSC_EXITING 0x00001000
+#define CPU_BASED_CR3_LOAD_EXITING 0x00008000
+#define CPU_BASED_CR3_STORE_EXITING 0x00010000
+#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
+#define CPU_BASED_CR8_STORE_EXITING 0x00100000
+#define CPU_BASED_TPR_SHADOW 0x00200000
+#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000
+#define CPU_BASED_MOV_DR_EXITING 0x00800000
+#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
+#define CPU_BASED_USE_IO_BITMAPS 0x02000000
+#define CPU_BASED_MONITOR_TRAP 0x08000000
+#define CPU_BASED_USE_MSR_BITMAPS 0x10000000
+#define CPU_BASED_MONITOR_EXITING 0x20000000
+#define CPU_BASED_PAUSE_EXITING 0x40000000
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
+
+#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172
+
+/*
+ * Definitions of Secondary Processor-Based VM-Execution Controls.
+ */
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
+#define SECONDARY_EXEC_DESC 0x00000004
+#define SECONDARY_EXEC_RDTSCP 0x00000008
+#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
+#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
+#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
+#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800
+#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
+#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000
+#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
+#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000
+#define SECONDARY_EXEC_ENABLE_PML 0x00020000
+#define SECONDARY_EPT_VE 0x00040000
+#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000
+#define SECONDARY_EXEC_TSC_SCALING 0x02000000
+
+#define PIN_BASED_EXT_INTR_MASK 0x00000001
+#define PIN_BASED_NMI_EXITING 0x00000008
+#define PIN_BASED_VIRTUAL_NMIS 0x00000020
+#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040
+#define PIN_BASED_POSTED_INTR 0x00000080
+
+#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016
+
+#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004
+#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
+#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000
+#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
+#define VM_EXIT_SAVE_IA32_PAT 0x00040000
+#define VM_EXIT_LOAD_IA32_PAT 0x00080000
+#define VM_EXIT_SAVE_IA32_EFER 0x00100000
+#define VM_EXIT_LOAD_IA32_EFER 0x00200000
+#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
+
+#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
+
+#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004
+#define VM_ENTRY_IA32E_MODE 0x00000200
+#define VM_ENTRY_SMM 0x00000400
+#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
+#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000
+#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
+#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
+
+#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
+
+#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
+#define VMX_MISC_SAVE_EFER_LMA 0x00000020
+
+#define EXIT_REASON_FAILED_VMENTRY 0x80000000
+#define EXIT_REASON_EXCEPTION_NMI 0
+#define EXIT_REASON_EXTERNAL_INTERRUPT 1
+#define EXIT_REASON_TRIPLE_FAULT 2
+#define EXIT_REASON_PENDING_INTERRUPT 7
+#define EXIT_REASON_NMI_WINDOW 8
+#define EXIT_REASON_TASK_SWITCH 9
+#define EXIT_REASON_CPUID 10
+#define EXIT_REASON_HLT 12
+#define EXIT_REASON_INVD 13
+#define EXIT_REASON_INVLPG 14
+#define EXIT_REASON_RDPMC 15
+#define EXIT_REASON_RDTSC 16
+#define EXIT_REASON_VMCALL 18
+#define EXIT_REASON_VMCLEAR 19
+#define EXIT_REASON_VMLAUNCH 20
+#define EXIT_REASON_VMPTRLD 21
+#define EXIT_REASON_VMPTRST 22
+#define EXIT_REASON_VMREAD 23
+#define EXIT_REASON_VMRESUME 24
+#define EXIT_REASON_VMWRITE 25
+#define EXIT_REASON_VMOFF 26
+#define EXIT_REASON_VMON 27
+#define EXIT_REASON_CR_ACCESS 28
+#define EXIT_REASON_DR_ACCESS 29
+#define EXIT_REASON_IO_INSTRUCTION 30
+#define EXIT_REASON_MSR_READ 31
+#define EXIT_REASON_MSR_WRITE 32
+#define EXIT_REASON_INVALID_STATE 33
+#define EXIT_REASON_MWAIT_INSTRUCTION 36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION 40
+#define EXIT_REASON_MCE_DURING_VMENTRY 41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS 44
+#define EXIT_REASON_EOI_INDUCED 45
+#define EXIT_REASON_EPT_VIOLATION 48
+#define EXIT_REASON_EPT_MISCONFIG 49
+#define EXIT_REASON_INVEPT 50
+#define EXIT_REASON_RDTSCP 51
+#define EXIT_REASON_PREEMPTION_TIMER 52
+#define EXIT_REASON_INVVPID 53
+#define EXIT_REASON_WBINVD 54
+#define EXIT_REASON_XSETBV 55
+#define EXIT_REASON_APIC_WRITE 56
+#define EXIT_REASON_INVPCID 58
+#define EXIT_REASON_PML_FULL 62
+#define EXIT_REASON_XSAVES 63
+#define EXIT_REASON_XRSTORS 64
+#define LAST_EXIT_REASON 64
+
+enum vmcs_field {
+ VIRTUAL_PROCESSOR_ID = 0x00000000,
+ POSTED_INTR_NV = 0x00000002,
+ GUEST_ES_SELECTOR = 0x00000800,
+ GUEST_CS_SELECTOR = 0x00000802,
+ GUEST_SS_SELECTOR = 0x00000804,
+ GUEST_DS_SELECTOR = 0x00000806,
+ GUEST_FS_SELECTOR = 0x00000808,
+ GUEST_GS_SELECTOR = 0x0000080a,
+ GUEST_LDTR_SELECTOR = 0x0000080c,
+ GUEST_TR_SELECTOR = 0x0000080e,
+ GUEST_INTR_STATUS = 0x00000810,
+ GUEST_PML_INDEX = 0x00000812,
+ HOST_ES_SELECTOR = 0x00000c00,
+ HOST_CS_SELECTOR = 0x00000c02,
+ HOST_SS_SELECTOR = 0x00000c04,
+ HOST_DS_SELECTOR = 0x00000c06,
+ HOST_FS_SELECTOR = 0x00000c08,
+ HOST_GS_SELECTOR = 0x00000c0a,
+ HOST_TR_SELECTOR = 0x00000c0c,
+ IO_BITMAP_A = 0x00002000,
+ IO_BITMAP_A_HIGH = 0x00002001,
+ IO_BITMAP_B = 0x00002002,
+ IO_BITMAP_B_HIGH = 0x00002003,
+ MSR_BITMAP = 0x00002004,
+ MSR_BITMAP_HIGH = 0x00002005,
+ VM_EXIT_MSR_STORE_ADDR = 0x00002006,
+ VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007,
+ VM_EXIT_MSR_LOAD_ADDR = 0x00002008,
+ VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
+ VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
+ VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
+ PML_ADDRESS = 0x0000200e,
+ PML_ADDRESS_HIGH = 0x0000200f,
+ TSC_OFFSET = 0x00002010,
+ TSC_OFFSET_HIGH = 0x00002011,
+ VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
+ VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
+ APIC_ACCESS_ADDR = 0x00002014,
+ APIC_ACCESS_ADDR_HIGH = 0x00002015,
+ POSTED_INTR_DESC_ADDR = 0x00002016,
+ POSTED_INTR_DESC_ADDR_HIGH = 0x00002017,
+ EPT_POINTER = 0x0000201a,
+ EPT_POINTER_HIGH = 0x0000201b,
+ EOI_EXIT_BITMAP0 = 0x0000201c,
+ EOI_EXIT_BITMAP0_HIGH = 0x0000201d,
+ EOI_EXIT_BITMAP1 = 0x0000201e,
+ EOI_EXIT_BITMAP1_HIGH = 0x0000201f,
+ EOI_EXIT_BITMAP2 = 0x00002020,
+ EOI_EXIT_BITMAP2_HIGH = 0x00002021,
+ EOI_EXIT_BITMAP3 = 0x00002022,
+ EOI_EXIT_BITMAP3_HIGH = 0x00002023,
+ VMREAD_BITMAP = 0x00002026,
+ VMREAD_BITMAP_HIGH = 0x00002027,
+ VMWRITE_BITMAP = 0x00002028,
+ VMWRITE_BITMAP_HIGH = 0x00002029,
+ XSS_EXIT_BITMAP = 0x0000202C,
+ XSS_EXIT_BITMAP_HIGH = 0x0000202D,
+ TSC_MULTIPLIER = 0x00002032,
+ TSC_MULTIPLIER_HIGH = 0x00002033,
+ GUEST_PHYSICAL_ADDRESS = 0x00002400,
+ GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
+ VMCS_LINK_POINTER = 0x00002800,
+ VMCS_LINK_POINTER_HIGH = 0x00002801,
+ GUEST_IA32_DEBUGCTL = 0x00002802,
+ GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
+ GUEST_IA32_PAT = 0x00002804,
+ GUEST_IA32_PAT_HIGH = 0x00002805,
+ GUEST_IA32_EFER = 0x00002806,
+ GUEST_IA32_EFER_HIGH = 0x00002807,
+ GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
+ GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809,
+ GUEST_PDPTR0 = 0x0000280a,
+ GUEST_PDPTR0_HIGH = 0x0000280b,
+ GUEST_PDPTR1 = 0x0000280c,
+ GUEST_PDPTR1_HIGH = 0x0000280d,
+ GUEST_PDPTR2 = 0x0000280e,
+ GUEST_PDPTR2_HIGH = 0x0000280f,
+ GUEST_PDPTR3 = 0x00002810,
+ GUEST_PDPTR3_HIGH = 0x00002811,
+ GUEST_BNDCFGS = 0x00002812,
+ GUEST_BNDCFGS_HIGH = 0x00002813,
+ HOST_IA32_PAT = 0x00002c00,
+ HOST_IA32_PAT_HIGH = 0x00002c01,
+ HOST_IA32_EFER = 0x00002c02,
+ HOST_IA32_EFER_HIGH = 0x00002c03,
+ HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
+ HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05,
+ PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
+ CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
+ EXCEPTION_BITMAP = 0x00004004,
+ PAGE_FAULT_ERROR_CODE_MASK = 0x00004006,
+ PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008,
+ CR3_TARGET_COUNT = 0x0000400a,
+ VM_EXIT_CONTROLS = 0x0000400c,
+ VM_EXIT_MSR_STORE_COUNT = 0x0000400e,
+ VM_EXIT_MSR_LOAD_COUNT = 0x00004010,
+ VM_ENTRY_CONTROLS = 0x00004012,
+ VM_ENTRY_MSR_LOAD_COUNT = 0x00004014,
+ VM_ENTRY_INTR_INFO_FIELD = 0x00004016,
+ VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018,
+ VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
+ TPR_THRESHOLD = 0x0000401c,
+ SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
+ PLE_GAP = 0x00004020,
+ PLE_WINDOW = 0x00004022,
+ VM_INSTRUCTION_ERROR = 0x00004400,
+ VM_EXIT_REASON = 0x00004402,
+ VM_EXIT_INTR_INFO = 0x00004404,
+ VM_EXIT_INTR_ERROR_CODE = 0x00004406,
+ IDT_VECTORING_INFO_FIELD = 0x00004408,
+ IDT_VECTORING_ERROR_CODE = 0x0000440a,
+ VM_EXIT_INSTRUCTION_LEN = 0x0000440c,
+ VMX_INSTRUCTION_INFO = 0x0000440e,
+ GUEST_ES_LIMIT = 0x00004800,
+ GUEST_CS_LIMIT = 0x00004802,
+ GUEST_SS_LIMIT = 0x00004804,
+ GUEST_DS_LIMIT = 0x00004806,
+ GUEST_FS_LIMIT = 0x00004808,
+ GUEST_GS_LIMIT = 0x0000480a,
+ GUEST_LDTR_LIMIT = 0x0000480c,
+ GUEST_TR_LIMIT = 0x0000480e,
+ GUEST_GDTR_LIMIT = 0x00004810,
+ GUEST_IDTR_LIMIT = 0x00004812,
+ GUEST_ES_AR_BYTES = 0x00004814,
+ GUEST_CS_AR_BYTES = 0x00004816,
+ GUEST_SS_AR_BYTES = 0x00004818,
+ GUEST_DS_AR_BYTES = 0x0000481a,
+ GUEST_FS_AR_BYTES = 0x0000481c,
+ GUEST_GS_AR_BYTES = 0x0000481e,
+ GUEST_LDTR_AR_BYTES = 0x00004820,
+ GUEST_TR_AR_BYTES = 0x00004822,
+ GUEST_INTERRUPTIBILITY_INFO = 0x00004824,
+ GUEST_ACTIVITY_STATE = 0X00004826,
+ GUEST_SYSENTER_CS = 0x0000482A,
+ VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
+ HOST_IA32_SYSENTER_CS = 0x00004c00,
+ CR0_GUEST_HOST_MASK = 0x00006000,
+ CR4_GUEST_HOST_MASK = 0x00006002,
+ CR0_READ_SHADOW = 0x00006004,
+ CR4_READ_SHADOW = 0x00006006,
+ CR3_TARGET_VALUE0 = 0x00006008,
+ CR3_TARGET_VALUE1 = 0x0000600a,
+ CR3_TARGET_VALUE2 = 0x0000600c,
+ CR3_TARGET_VALUE3 = 0x0000600e,
+ EXIT_QUALIFICATION = 0x00006400,
+ GUEST_LINEAR_ADDRESS = 0x0000640a,
+ GUEST_CR0 = 0x00006800,
+ GUEST_CR3 = 0x00006802,
+ GUEST_CR4 = 0x00006804,
+ GUEST_ES_BASE = 0x00006806,
+ GUEST_CS_BASE = 0x00006808,
+ GUEST_SS_BASE = 0x0000680a,
+ GUEST_DS_BASE = 0x0000680c,
+ GUEST_FS_BASE = 0x0000680e,
+ GUEST_GS_BASE = 0x00006810,
+ GUEST_LDTR_BASE = 0x00006812,
+ GUEST_TR_BASE = 0x00006814,
+ GUEST_GDTR_BASE = 0x00006816,
+ GUEST_IDTR_BASE = 0x00006818,
+ GUEST_DR7 = 0x0000681a,
+ GUEST_RSP = 0x0000681c,
+ GUEST_RIP = 0x0000681e,
+ GUEST_RFLAGS = 0x00006820,
+ GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822,
+ GUEST_SYSENTER_ESP = 0x00006824,
+ GUEST_SYSENTER_EIP = 0x00006826,
+ HOST_CR0 = 0x00006c00,
+ HOST_CR3 = 0x00006c02,
+ HOST_CR4 = 0x00006c04,
+ HOST_FS_BASE = 0x00006c06,
+ HOST_GS_BASE = 0x00006c08,
+ HOST_TR_BASE = 0x00006c0a,
+ HOST_GDTR_BASE = 0x00006c0c,
+ HOST_IDTR_BASE = 0x00006c0e,
+ HOST_IA32_SYSENTER_ESP = 0x00006c10,
+ HOST_IA32_SYSENTER_EIP = 0x00006c12,
+ HOST_RSP = 0x00006c14,
+ HOST_RIP = 0x00006c16,
+};
+
+struct vmx_msr_entry {
+ uint32_t index;
+ uint32_t reserved;
+ uint64_t value;
+} __attribute__ ((aligned(16)));
+
+static inline int vmxon(uint64_t phys)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(phys)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline void vmxoff(void)
+{
+ __asm__ __volatile__("vmxoff");
+}
+
+static inline int vmclear(uint64_t vmcs_pa)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(vmcs_pa)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline int vmptrld(uint64_t vmcs_pa)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(vmcs_pa)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline int vmptrst(uint64_t *value)
+{
+ uint64_t tmp;
+ uint8_t ret;
+
+ __asm__ __volatile__("vmptrst %[value]; setna %[ret]"
+ : [value]"=m"(tmp), [ret]"=rm"(ret)
+ : : "cc", "memory");
+
+ *value = tmp;
+ return ret;
+}
+
+/*
+ * A wrapper around vmptrst that ignores errors and returns zero if the
+ * vmptrst instruction fails.
+ */
+static inline uint64_t vmptrstz(void)
+{
+ uint64_t value = 0;
+ vmptrst(&value);
+ return value;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmlaunch.
+ */
+static inline int vmlaunch(void)
+{
+ int ret;
+
+ __asm__ __volatile__("push %%rbp;"
+ "push %%rcx;"
+ "push %%rdx;"
+ "push %%rsi;"
+ "push %%rdi;"
+ "push $0;"
+ "vmwrite %%rsp, %[host_rsp];"
+ "lea 1f(%%rip), %%rax;"
+ "vmwrite %%rax, %[host_rip];"
+ "vmlaunch;"
+ "incq (%%rsp);"
+ "1: pop %%rax;"
+ "pop %%rdi;"
+ "pop %%rsi;"
+ "pop %%rdx;"
+ "pop %%rcx;"
+ "pop %%rbp;"
+ : [ret]"=&a"(ret)
+ : [host_rsp]"r"((uint64_t)HOST_RSP),
+ [host_rip]"r"((uint64_t)HOST_RIP)
+ : "memory", "cc", "rbx", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15");
+ return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int vmresume(void)
+{
+ int ret;
+
+ __asm__ __volatile__("push %%rbp;"
+ "push %%rcx;"
+ "push %%rdx;"
+ "push %%rsi;"
+ "push %%rdi;"
+ "push $0;"
+ "vmwrite %%rsp, %[host_rsp];"
+ "lea 1f(%%rip), %%rax;"
+ "vmwrite %%rax, %[host_rip];"
+ "vmresume;"
+ "incq (%%rsp);"
+ "1: pop %%rax;"
+ "pop %%rdi;"
+ "pop %%rsi;"
+ "pop %%rdx;"
+ "pop %%rcx;"
+ "pop %%rbp;"
+ : [ret]"=&a"(ret)
+ : [host_rsp]"r"((uint64_t)HOST_RSP),
+ [host_rip]"r"((uint64_t)HOST_RIP)
+ : "memory", "cc", "rbx", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15");
+ return ret;
+}
+
+static inline void vmcall(void)
+{
+ /* Currently, L1 destroys our GPRs during vmexits. */
+ __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" : : :
+ "rax", "rbx", "rcx", "rdx",
+ "rsi", "rdi", "r8", "r9", "r10", "r11", "r12",
+ "r13", "r14", "r15");
+}
+
+static inline int vmread(uint64_t encoding, uint64_t *value)
+{
+ uint64_t tmp;
+ uint8_t ret;
+
+ __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
+ : [value]"=rm"(tmp), [ret]"=rm"(ret)
+ : [encoding]"r"(encoding)
+ : "cc", "memory");
+
+ *value = tmp;
+ return ret;
+}
+
+/*
+ * A wrapper around vmread that ignores errors and returns zero if the
+ * vmread instruction fails.
+ */
+static inline uint64_t vmreadz(uint64_t encoding)
+{
+ uint64_t value = 0;
+ vmread(encoding, &value);
+ return value;
+}
+
+static inline int vmwrite(uint64_t encoding, uint64_t value)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [value]"rm"(value), [encoding]"r"(encoding)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline uint32_t vmcs_revision(void)
+{
+ return rdmsr(MSR_IA32_VMX_BASIC);
+}
+
+struct vmx_pages {
+ void *vmxon_hva;
+ uint64_t vmxon_gpa;
+ void *vmxon;
+
+ void *vmcs_hva;
+ uint64_t vmcs_gpa;
+ void *vmcs;
+
+ void *msr_hva;
+ uint64_t msr_gpa;
+ void *msr;
+
+ void *shadow_vmcs_hva;
+ uint64_t shadow_vmcs_gpa;
+ void *shadow_vmcs;
+
+ void *vmread_hva;
+ uint64_t vmread_gpa;
+ void *vmread;
+
+ void *vmwrite_hva;
+ uint64_t vmwrite_gpa;
+ void *vmwrite;
+};
+
+struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
+bool prepare_for_vmx_operation(struct vmx_pages *vmx);
+void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
+
+#endif /* !SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86.h
new file mode 100644
index 000000000..a7667a613
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86.h
@@ -0,0 +1,1047 @@
+/*
+ * tools/testing/selftests/kvm/include/x86.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef SELFTEST_KVM_X86_H
+#define SELFTEST_KVM_X86_H
+
+#include <assert.h>
+#include <stdint.h>
+
+#define X86_EFLAGS_FIXED (1u << 1)
+
+#define X86_CR4_VME (1ul << 0)
+#define X86_CR4_PVI (1ul << 1)
+#define X86_CR4_TSD (1ul << 2)
+#define X86_CR4_DE (1ul << 3)
+#define X86_CR4_PSE (1ul << 4)
+#define X86_CR4_PAE (1ul << 5)
+#define X86_CR4_MCE (1ul << 6)
+#define X86_CR4_PGE (1ul << 7)
+#define X86_CR4_PCE (1ul << 8)
+#define X86_CR4_OSFXSR (1ul << 9)
+#define X86_CR4_OSXMMEXCPT (1ul << 10)
+#define X86_CR4_UMIP (1ul << 11)
+#define X86_CR4_VMXE (1ul << 13)
+#define X86_CR4_SMXE (1ul << 14)
+#define X86_CR4_FSGSBASE (1ul << 16)
+#define X86_CR4_PCIDE (1ul << 17)
+#define X86_CR4_OSXSAVE (1ul << 18)
+#define X86_CR4_SMEP (1ul << 20)
+#define X86_CR4_SMAP (1ul << 21)
+#define X86_CR4_PKE (1ul << 22)
+
+/* The enum values match the intruction encoding of each register */
+enum x86_register {
+ RAX = 0,
+ RCX,
+ RDX,
+ RBX,
+ RSP,
+ RBP,
+ RSI,
+ RDI,
+ R8,
+ R9,
+ R10,
+ R11,
+ R12,
+ R13,
+ R14,
+ R15,
+};
+
+struct desc64 {
+ uint16_t limit0;
+ uint16_t base0;
+ unsigned base1:8, type:4, s:1, dpl:2, p:1;
+ unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
+ uint32_t base3;
+ uint32_t zero1;
+} __attribute__((packed));
+
+struct desc_ptr {
+ uint16_t size;
+ uint64_t address;
+} __attribute__((packed));
+
+static inline uint64_t get_desc64_base(const struct desc64 *desc)
+{
+ return ((uint64_t)desc->base3 << 32) |
+ (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+}
+
+static inline uint64_t rdtsc(void)
+{
+ uint32_t eax, edx;
+
+ /*
+ * The lfence is to wait (on Intel CPUs) until all previous
+ * instructions have been executed.
+ */
+ __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx));
+ return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdtscp(uint32_t *aux)
+{
+ uint32_t eax, edx;
+
+ __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
+ return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdmsr(uint32_t msr)
+{
+ uint32_t a, d;
+
+ __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+ return a | ((uint64_t) d << 32);
+}
+
+static inline void wrmsr(uint32_t msr, uint64_t value)
+{
+ uint32_t a = value;
+ uint32_t d = value >> 32;
+
+ __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+
+static inline uint16_t inw(uint16_t port)
+{
+ uint16_t tmp;
+
+ __asm__ __volatile__("in %%dx, %%ax"
+ : /* output */ "=a" (tmp)
+ : /* input */ "d" (port));
+
+ return tmp;
+}
+
+static inline uint16_t get_es(void)
+{
+ uint16_t es;
+
+ __asm__ __volatile__("mov %%es, %[es]"
+ : /* output */ [es]"=rm"(es));
+ return es;
+}
+
+static inline uint16_t get_cs(void)
+{
+ uint16_t cs;
+
+ __asm__ __volatile__("mov %%cs, %[cs]"
+ : /* output */ [cs]"=rm"(cs));
+ return cs;
+}
+
+static inline uint16_t get_ss(void)
+{
+ uint16_t ss;
+
+ __asm__ __volatile__("mov %%ss, %[ss]"
+ : /* output */ [ss]"=rm"(ss));
+ return ss;
+}
+
+static inline uint16_t get_ds(void)
+{
+ uint16_t ds;
+
+ __asm__ __volatile__("mov %%ds, %[ds]"
+ : /* output */ [ds]"=rm"(ds));
+ return ds;
+}
+
+static inline uint16_t get_fs(void)
+{
+ uint16_t fs;
+
+ __asm__ __volatile__("mov %%fs, %[fs]"
+ : /* output */ [fs]"=rm"(fs));
+ return fs;
+}
+
+static inline uint16_t get_gs(void)
+{
+ uint16_t gs;
+
+ __asm__ __volatile__("mov %%gs, %[gs]"
+ : /* output */ [gs]"=rm"(gs));
+ return gs;
+}
+
+static inline uint16_t get_tr(void)
+{
+ uint16_t tr;
+
+ __asm__ __volatile__("str %[tr]"
+ : /* output */ [tr]"=rm"(tr));
+ return tr;
+}
+
+static inline uint64_t get_cr0(void)
+{
+ uint64_t cr0;
+
+ __asm__ __volatile__("mov %%cr0, %[cr0]"
+ : /* output */ [cr0]"=r"(cr0));
+ return cr0;
+}
+
+static inline uint64_t get_cr3(void)
+{
+ uint64_t cr3;
+
+ __asm__ __volatile__("mov %%cr3, %[cr3]"
+ : /* output */ [cr3]"=r"(cr3));
+ return cr3;
+}
+
+static inline uint64_t get_cr4(void)
+{
+ uint64_t cr4;
+
+ __asm__ __volatile__("mov %%cr4, %[cr4]"
+ : /* output */ [cr4]"=r"(cr4));
+ return cr4;
+}
+
+static inline void set_cr4(uint64_t val)
+{
+ __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
+}
+
+static inline uint64_t get_gdt_base(void)
+{
+ struct desc_ptr gdt;
+ __asm__ __volatile__("sgdt %[gdt]"
+ : /* output */ [gdt]"=m"(gdt));
+ return gdt.address;
+}
+
+static inline uint64_t get_idt_base(void)
+{
+ struct desc_ptr idt;
+ __asm__ __volatile__("sidt %[idt]"
+ : /* output */ [idt]"=m"(idt));
+ return idt.address;
+}
+
+#define SET_XMM(__var, __xmm) \
+ asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
+
+static inline void set_xmm(int n, unsigned long val)
+{
+ switch (n) {
+ case 0:
+ SET_XMM(val, xmm0);
+ break;
+ case 1:
+ SET_XMM(val, xmm1);
+ break;
+ case 2:
+ SET_XMM(val, xmm2);
+ break;
+ case 3:
+ SET_XMM(val, xmm3);
+ break;
+ case 4:
+ SET_XMM(val, xmm4);
+ break;
+ case 5:
+ SET_XMM(val, xmm5);
+ break;
+ case 6:
+ SET_XMM(val, xmm6);
+ break;
+ case 7:
+ SET_XMM(val, xmm7);
+ break;
+ }
+}
+
+typedef unsigned long v1di __attribute__ ((vector_size (8)));
+static inline unsigned long get_xmm(int n)
+{
+ assert(n >= 0 && n <= 7);
+
+ register v1di xmm0 __asm__("%xmm0");
+ register v1di xmm1 __asm__("%xmm1");
+ register v1di xmm2 __asm__("%xmm2");
+ register v1di xmm3 __asm__("%xmm3");
+ register v1di xmm4 __asm__("%xmm4");
+ register v1di xmm5 __asm__("%xmm5");
+ register v1di xmm6 __asm__("%xmm6");
+ register v1di xmm7 __asm__("%xmm7");
+ switch (n) {
+ case 0:
+ return (unsigned long)xmm0;
+ case 1:
+ return (unsigned long)xmm1;
+ case 2:
+ return (unsigned long)xmm2;
+ case 3:
+ return (unsigned long)xmm3;
+ case 4:
+ return (unsigned long)xmm4;
+ case 5:
+ return (unsigned long)xmm5;
+ case 6:
+ return (unsigned long)xmm6;
+ case 7:
+ return (unsigned long)xmm7;
+ }
+ return 0;
+}
+
+struct kvm_x86_state;
+struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state);
+
+/*
+ * Basic CPU control in CR0
+ */
+#define X86_CR0_PE (1UL<<0) /* Protection Enable */
+#define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */
+#define X86_CR0_EM (1UL<<2) /* Emulation */
+#define X86_CR0_TS (1UL<<3) /* Task Switched */
+#define X86_CR0_ET (1UL<<4) /* Extension Type */
+#define X86_CR0_NE (1UL<<5) /* Numeric Error */
+#define X86_CR0_WP (1UL<<16) /* Write Protect */
+#define X86_CR0_AM (1UL<<18) /* Alignment Mask */
+#define X86_CR0_NW (1UL<<29) /* Not Write-through */
+#define X86_CR0_CD (1UL<<30) /* Cache Disable */
+#define X86_CR0_PG (1UL<<31) /* Paging */
+
+/*
+ * CPU model specific register (MSR) numbers.
+ */
+
+/* x86-64 specific MSRs */
+#define MSR_EFER 0xc0000080 /* extended feature register */
+#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */
+#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */
+#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */
+#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */
+#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */
+#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */
+#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */
+#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */
+
+/* EFER bits: */
+#define EFER_SCE (1<<0) /* SYSCALL/SYSRET */
+#define EFER_LME (1<<8) /* Long mode enable */
+#define EFER_LMA (1<<10) /* Long mode active (read-only) */
+#define EFER_NX (1<<11) /* No execute enable */
+#define EFER_SVME (1<<12) /* Enable virtualization */
+#define EFER_LMSLE (1<<13) /* Long Mode Segment Limit Enable */
+#define EFER_FFXSR (1<<14) /* Enable Fast FXSAVE/FXRSTOR */
+
+/* Intel MSRs. Some also available on other CPUs */
+
+#define MSR_PPIN_CTL 0x0000004e
+#define MSR_PPIN 0x0000004f
+
+#define MSR_IA32_PERFCTR0 0x000000c1
+#define MSR_IA32_PERFCTR1 0x000000c2
+#define MSR_FSB_FREQ 0x000000cd
+#define MSR_PLATFORM_INFO 0x000000ce
+#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
+#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
+
+#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
+#define NHM_C3_AUTO_DEMOTE (1UL << 25)
+#define NHM_C1_AUTO_DEMOTE (1UL << 26)
+#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
+#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
+#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
+
+#define MSR_MTRRcap 0x000000fe
+#define MSR_IA32_BBL_CR_CTL 0x00000119
+#define MSR_IA32_BBL_CR_CTL3 0x0000011e
+
+#define MSR_IA32_SYSENTER_CS 0x00000174
+#define MSR_IA32_SYSENTER_ESP 0x00000175
+#define MSR_IA32_SYSENTER_EIP 0x00000176
+
+#define MSR_IA32_MCG_CAP 0x00000179
+#define MSR_IA32_MCG_STATUS 0x0000017a
+#define MSR_IA32_MCG_CTL 0x0000017b
+#define MSR_IA32_MCG_EXT_CTL 0x000004d0
+
+#define MSR_OFFCORE_RSP_0 0x000001a6
+#define MSR_OFFCORE_RSP_1 0x000001a7
+#define MSR_TURBO_RATIO_LIMIT 0x000001ad
+#define MSR_TURBO_RATIO_LIMIT1 0x000001ae
+#define MSR_TURBO_RATIO_LIMIT2 0x000001af
+
+#define MSR_LBR_SELECT 0x000001c8
+#define MSR_LBR_TOS 0x000001c9
+#define MSR_LBR_NHM_FROM 0x00000680
+#define MSR_LBR_NHM_TO 0x000006c0
+#define MSR_LBR_CORE_FROM 0x00000040
+#define MSR_LBR_CORE_TO 0x00000060
+
+#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */
+#define LBR_INFO_MISPRED BIT_ULL(63)
+#define LBR_INFO_IN_TX BIT_ULL(62)
+#define LBR_INFO_ABORT BIT_ULL(61)
+#define LBR_INFO_CYCLES 0xffff
+
+#define MSR_IA32_PEBS_ENABLE 0x000003f1
+#define MSR_IA32_DS_AREA 0x00000600
+#define MSR_IA32_PERF_CAPABILITIES 0x00000345
+#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
+
+#define MSR_IA32_RTIT_CTL 0x00000570
+#define MSR_IA32_RTIT_STATUS 0x00000571
+#define MSR_IA32_RTIT_ADDR0_A 0x00000580
+#define MSR_IA32_RTIT_ADDR0_B 0x00000581
+#define MSR_IA32_RTIT_ADDR1_A 0x00000582
+#define MSR_IA32_RTIT_ADDR1_B 0x00000583
+#define MSR_IA32_RTIT_ADDR2_A 0x00000584
+#define MSR_IA32_RTIT_ADDR2_B 0x00000585
+#define MSR_IA32_RTIT_ADDR3_A 0x00000586
+#define MSR_IA32_RTIT_ADDR3_B 0x00000587
+#define MSR_IA32_RTIT_CR3_MATCH 0x00000572
+#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560
+#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561
+
+#define MSR_MTRRfix64K_00000 0x00000250
+#define MSR_MTRRfix16K_80000 0x00000258
+#define MSR_MTRRfix16K_A0000 0x00000259
+#define MSR_MTRRfix4K_C0000 0x00000268
+#define MSR_MTRRfix4K_C8000 0x00000269
+#define MSR_MTRRfix4K_D0000 0x0000026a
+#define MSR_MTRRfix4K_D8000 0x0000026b
+#define MSR_MTRRfix4K_E0000 0x0000026c
+#define MSR_MTRRfix4K_E8000 0x0000026d
+#define MSR_MTRRfix4K_F0000 0x0000026e
+#define MSR_MTRRfix4K_F8000 0x0000026f
+#define MSR_MTRRdefType 0x000002ff
+
+#define MSR_IA32_CR_PAT 0x00000277
+
+#define MSR_IA32_DEBUGCTLMSR 0x000001d9
+#define MSR_IA32_LASTBRANCHFROMIP 0x000001db
+#define MSR_IA32_LASTBRANCHTOIP 0x000001dc
+#define MSR_IA32_LASTINTFROMIP 0x000001dd
+#define MSR_IA32_LASTINTTOIP 0x000001de
+
+/* DEBUGCTLMSR bits (others vary by model): */
+#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
+#define DEBUGCTLMSR_BTF_SHIFT 1
+#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
+#define DEBUGCTLMSR_TR (1UL << 6)
+#define DEBUGCTLMSR_BTS (1UL << 7)
+#define DEBUGCTLMSR_BTINT (1UL << 8)
+#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9)
+#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
+#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
+#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
+#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
+
+#define MSR_PEBS_FRONTEND 0x000003f7
+
+#define MSR_IA32_POWER_CTL 0x000001fc
+
+#define MSR_IA32_MC0_CTL 0x00000400
+#define MSR_IA32_MC0_STATUS 0x00000401
+#define MSR_IA32_MC0_ADDR 0x00000402
+#define MSR_IA32_MC0_MISC 0x00000403
+
+/* C-state Residency Counters */
+#define MSR_PKG_C3_RESIDENCY 0x000003f8
+#define MSR_PKG_C6_RESIDENCY 0x000003f9
+#define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa
+#define MSR_PKG_C7_RESIDENCY 0x000003fa
+#define MSR_CORE_C3_RESIDENCY 0x000003fc
+#define MSR_CORE_C6_RESIDENCY 0x000003fd
+#define MSR_CORE_C7_RESIDENCY 0x000003fe
+#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff
+#define MSR_PKG_C2_RESIDENCY 0x0000060d
+#define MSR_PKG_C8_RESIDENCY 0x00000630
+#define MSR_PKG_C9_RESIDENCY 0x00000631
+#define MSR_PKG_C10_RESIDENCY 0x00000632
+
+/* Interrupt Response Limit */
+#define MSR_PKGC3_IRTL 0x0000060a
+#define MSR_PKGC6_IRTL 0x0000060b
+#define MSR_PKGC7_IRTL 0x0000060c
+#define MSR_PKGC8_IRTL 0x00000633
+#define MSR_PKGC9_IRTL 0x00000634
+#define MSR_PKGC10_IRTL 0x00000635
+
+/* Run Time Average Power Limiting (RAPL) Interface */
+
+#define MSR_RAPL_POWER_UNIT 0x00000606
+
+#define MSR_PKG_POWER_LIMIT 0x00000610
+#define MSR_PKG_ENERGY_STATUS 0x00000611
+#define MSR_PKG_PERF_STATUS 0x00000613
+#define MSR_PKG_POWER_INFO 0x00000614
+
+#define MSR_DRAM_POWER_LIMIT 0x00000618
+#define MSR_DRAM_ENERGY_STATUS 0x00000619
+#define MSR_DRAM_PERF_STATUS 0x0000061b
+#define MSR_DRAM_POWER_INFO 0x0000061c
+
+#define MSR_PP0_POWER_LIMIT 0x00000638
+#define MSR_PP0_ENERGY_STATUS 0x00000639
+#define MSR_PP0_POLICY 0x0000063a
+#define MSR_PP0_PERF_STATUS 0x0000063b
+
+#define MSR_PP1_POWER_LIMIT 0x00000640
+#define MSR_PP1_ENERGY_STATUS 0x00000641
+#define MSR_PP1_POLICY 0x00000642
+
+/* Config TDP MSRs */
+#define MSR_CONFIG_TDP_NOMINAL 0x00000648
+#define MSR_CONFIG_TDP_LEVEL_1 0x00000649
+#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A
+#define MSR_CONFIG_TDP_CONTROL 0x0000064B
+#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C
+
+#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D
+
+#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
+#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
+#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
+#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B
+
+#define MSR_CORE_C1_RES 0x00000660
+#define MSR_MODULE_C6_RES_MS 0x00000664
+
+#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668
+#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669
+
+#define MSR_ATOM_CORE_RATIOS 0x0000066a
+#define MSR_ATOM_CORE_VIDS 0x0000066b
+#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c
+#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d
+
+
+#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690
+#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0
+#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1
+
+/* Hardware P state interface */
+#define MSR_PPERF 0x0000064e
+#define MSR_PERF_LIMIT_REASONS 0x0000064f
+#define MSR_PM_ENABLE 0x00000770
+#define MSR_HWP_CAPABILITIES 0x00000771
+#define MSR_HWP_REQUEST_PKG 0x00000772
+#define MSR_HWP_INTERRUPT 0x00000773
+#define MSR_HWP_REQUEST 0x00000774
+#define MSR_HWP_STATUS 0x00000777
+
+/* CPUID.6.EAX */
+#define HWP_BASE_BIT (1<<7)
+#define HWP_NOTIFICATIONS_BIT (1<<8)
+#define HWP_ACTIVITY_WINDOW_BIT (1<<9)
+#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10)
+#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11)
+
+/* IA32_HWP_CAPABILITIES */
+#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff)
+#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff)
+#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff)
+#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff)
+
+/* IA32_HWP_REQUEST */
+#define HWP_MIN_PERF(x) (x & 0xff)
+#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
+#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16)
+#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24)
+#define HWP_EPP_PERFORMANCE 0x00
+#define HWP_EPP_BALANCE_PERFORMANCE 0x80
+#define HWP_EPP_BALANCE_POWERSAVE 0xC0
+#define HWP_EPP_POWERSAVE 0xFF
+#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32)
+#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42)
+
+/* IA32_HWP_STATUS */
+#define HWP_GUARANTEED_CHANGE(x) (x & 0x1)
+#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4)
+
+/* IA32_HWP_INTERRUPT */
+#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1)
+#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2)
+
+#define MSR_AMD64_MC0_MASK 0xc0010044
+
+#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
+#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x))
+#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x))
+#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x))
+
+#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x))
+
+/* These are consecutive and not in the normal 4er MCE bank block */
+#define MSR_IA32_MC0_CTL2 0x00000280
+#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x))
+
+#define MSR_P6_PERFCTR0 0x000000c1
+#define MSR_P6_PERFCTR1 0x000000c2
+#define MSR_P6_EVNTSEL0 0x00000186
+#define MSR_P6_EVNTSEL1 0x00000187
+
+#define MSR_KNC_PERFCTR0 0x00000020
+#define MSR_KNC_PERFCTR1 0x00000021
+#define MSR_KNC_EVNTSEL0 0x00000028
+#define MSR_KNC_EVNTSEL1 0x00000029
+
+/* Alternative perfctr range with full access. */
+#define MSR_IA32_PMC0 0x000004c1
+
+/* AMD64 MSRs. Not complete. See the architecture manual for a more
+ complete list. */
+
+#define MSR_AMD64_PATCH_LEVEL 0x0000008b
+#define MSR_AMD64_TSC_RATIO 0xc0000104
+#define MSR_AMD64_NB_CFG 0xc001001f
+#define MSR_AMD64_PATCH_LOADER 0xc0010020
+#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
+#define MSR_AMD64_OSVW_STATUS 0xc0010141
+#define MSR_AMD64_LS_CFG 0xc0011020
+#define MSR_AMD64_DC_CFG 0xc0011022
+#define MSR_AMD64_BU_CFG2 0xc001102a
+#define MSR_AMD64_IBSFETCHCTL 0xc0011030
+#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
+#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
+#define MSR_AMD64_IBSFETCH_REG_COUNT 3
+#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
+#define MSR_AMD64_IBSOPCTL 0xc0011033
+#define MSR_AMD64_IBSOPRIP 0xc0011034
+#define MSR_AMD64_IBSOPDATA 0xc0011035
+#define MSR_AMD64_IBSOPDATA2 0xc0011036
+#define MSR_AMD64_IBSOPDATA3 0xc0011037
+#define MSR_AMD64_IBSDCLINAD 0xc0011038
+#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
+#define MSR_AMD64_IBSOP_REG_COUNT 7
+#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
+#define MSR_AMD64_IBSCTL 0xc001103a
+#define MSR_AMD64_IBSBRTARGET 0xc001103b
+#define MSR_AMD64_IBSOPDATA4 0xc001103d
+#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_SEV 0xc0010131
+#define MSR_AMD64_SEV_ENABLED_BIT 0
+#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
+
+/* Fam 17h MSRs */
+#define MSR_F17H_IRPERF 0xc00000e9
+
+/* Fam 16h MSRs */
+#define MSR_F16H_L2I_PERF_CTL 0xc0010230
+#define MSR_F16H_L2I_PERF_CTR 0xc0010231
+#define MSR_F16H_DR1_ADDR_MASK 0xc0011019
+#define MSR_F16H_DR2_ADDR_MASK 0xc001101a
+#define MSR_F16H_DR3_ADDR_MASK 0xc001101b
+#define MSR_F16H_DR0_ADDR_MASK 0xc0011027
+
+/* Fam 15h MSRs */
+#define MSR_F15H_PERF_CTL 0xc0010200
+#define MSR_F15H_PERF_CTR 0xc0010201
+#define MSR_F15H_NB_PERF_CTL 0xc0010240
+#define MSR_F15H_NB_PERF_CTR 0xc0010241
+#define MSR_F15H_PTSC 0xc0010280
+#define MSR_F15H_IC_CFG 0xc0011021
+
+/* Fam 10h MSRs */
+#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
+#define FAM10H_MMIO_CONF_ENABLE (1<<0)
+#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf
+#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
+#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
+#define FAM10H_MMIO_CONF_BASE_SHIFT 20
+#define MSR_FAM10H_NODE_ID 0xc001100c
+#define MSR_F10H_DECFG 0xc0011029
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
+
+/* K8 MSRs */
+#define MSR_K8_TOP_MEM1 0xc001001a
+#define MSR_K8_TOP_MEM2 0xc001001d
+#define MSR_K8_SYSCFG 0xc0010010
+#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23
+#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_K8_INT_PENDING_MSG 0xc0010055
+/* C1E active bits in int pending message */
+#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
+#define MSR_K8_TSEG_ADDR 0xc0010112
+#define MSR_K8_TSEG_MASK 0xc0010113
+#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
+#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
+#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
+
+/* K7 MSRs */
+#define MSR_K7_EVNTSEL0 0xc0010000
+#define MSR_K7_PERFCTR0 0xc0010004
+#define MSR_K7_EVNTSEL1 0xc0010001
+#define MSR_K7_PERFCTR1 0xc0010005
+#define MSR_K7_EVNTSEL2 0xc0010002
+#define MSR_K7_PERFCTR2 0xc0010006
+#define MSR_K7_EVNTSEL3 0xc0010003
+#define MSR_K7_PERFCTR3 0xc0010007
+#define MSR_K7_CLK_CTL 0xc001001b
+#define MSR_K7_HWCR 0xc0010015
+#define MSR_K7_HWCR_SMMLOCK_BIT 0
+#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
+#define MSR_K7_FID_VID_CTL 0xc0010041
+#define MSR_K7_FID_VID_STATUS 0xc0010042
+
+/* K6 MSRs */
+#define MSR_K6_WHCR 0xc0000082
+#define MSR_K6_UWCCR 0xc0000085
+#define MSR_K6_EPMR 0xc0000086
+#define MSR_K6_PSOR 0xc0000087
+#define MSR_K6_PFIR 0xc0000088
+
+/* Centaur-Hauls/IDT defined MSRs. */
+#define MSR_IDT_FCR1 0x00000107
+#define MSR_IDT_FCR2 0x00000108
+#define MSR_IDT_FCR3 0x00000109
+#define MSR_IDT_FCR4 0x0000010a
+
+#define MSR_IDT_MCR0 0x00000110
+#define MSR_IDT_MCR1 0x00000111
+#define MSR_IDT_MCR2 0x00000112
+#define MSR_IDT_MCR3 0x00000113
+#define MSR_IDT_MCR4 0x00000114
+#define MSR_IDT_MCR5 0x00000115
+#define MSR_IDT_MCR6 0x00000116
+#define MSR_IDT_MCR7 0x00000117
+#define MSR_IDT_MCR_CTRL 0x00000120
+
+/* VIA Cyrix defined MSRs*/
+#define MSR_VIA_FCR 0x00001107
+#define MSR_VIA_LONGHAUL 0x0000110a
+#define MSR_VIA_RNG 0x0000110b
+#define MSR_VIA_BCR2 0x00001147
+
+/* Transmeta defined MSRs */
+#define MSR_TMTA_LONGRUN_CTRL 0x80868010
+#define MSR_TMTA_LONGRUN_FLAGS 0x80868011
+#define MSR_TMTA_LRTI_READOUT 0x80868018
+#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a
+
+/* Intel defined MSRs. */
+#define MSR_IA32_P5_MC_ADDR 0x00000000
+#define MSR_IA32_P5_MC_TYPE 0x00000001
+#define MSR_IA32_TSC 0x00000010
+#define MSR_IA32_PLATFORM_ID 0x00000017
+#define MSR_IA32_EBL_CR_POWERON 0x0000002a
+#define MSR_EBC_FREQUENCY_ID 0x0000002c
+#define MSR_SMI_COUNT 0x00000034
+#define MSR_IA32_FEATURE_CONTROL 0x0000003a
+#define MSR_IA32_TSC_ADJUST 0x0000003b
+#define MSR_IA32_BNDCFGS 0x00000d90
+
+#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
+
+#define MSR_IA32_XSS 0x00000da0
+
+#define FEATURE_CONTROL_LOCKED (1<<0)
+#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
+#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
+#define FEATURE_CONTROL_LMCE (1<<20)
+
+#define MSR_IA32_APICBASE 0x0000001b
+#define MSR_IA32_APICBASE_BSP (1<<8)
+#define MSR_IA32_APICBASE_ENABLE (1<<11)
+#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+
+#define MSR_IA32_TSCDEADLINE 0x000006e0
+
+#define MSR_IA32_UCODE_WRITE 0x00000079
+#define MSR_IA32_UCODE_REV 0x0000008b
+
+#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b
+#define MSR_IA32_SMBASE 0x0000009e
+
+#define MSR_IA32_PERF_STATUS 0x00000198
+#define MSR_IA32_PERF_CTL 0x00000199
+#define INTEL_PERF_CTL_MASK 0xffff
+#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
+#define MSR_AMD_PERF_STATUS 0xc0010063
+#define MSR_AMD_PERF_CTL 0xc0010062
+
+#define MSR_IA32_MPERF 0x000000e7
+#define MSR_IA32_APERF 0x000000e8
+
+#define MSR_IA32_THERM_CONTROL 0x0000019a
+#define MSR_IA32_THERM_INTERRUPT 0x0000019b
+
+#define THERM_INT_HIGH_ENABLE (1 << 0)
+#define THERM_INT_LOW_ENABLE (1 << 1)
+#define THERM_INT_PLN_ENABLE (1 << 24)
+
+#define MSR_IA32_THERM_STATUS 0x0000019c
+
+#define THERM_STATUS_PROCHOT (1 << 0)
+#define THERM_STATUS_POWER_LIMIT (1 << 10)
+
+#define MSR_THERM2_CTL 0x0000019d
+
+#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16)
+
+#define MSR_IA32_MISC_ENABLE 0x000001a0
+
+#define MSR_IA32_TEMPERATURE_TARGET 0x000001a2
+
+#define MSR_MISC_FEATURE_CONTROL 0x000001a4
+#define MSR_MISC_PWR_MGMT 0x000001aa
+
+#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
+#define ENERGY_PERF_BIAS_PERFORMANCE 0
+#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4
+#define ENERGY_PERF_BIAS_NORMAL 6
+#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8
+#define ENERGY_PERF_BIAS_POWERSAVE 15
+
+#define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1
+
+#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0)
+#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10)
+
+#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2
+
+#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0)
+#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1)
+#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24)
+
+/* Thermal Thresholds Support */
+#define THERM_INT_THRESHOLD0_ENABLE (1 << 15)
+#define THERM_SHIFT_THRESHOLD0 8
+#define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0)
+#define THERM_INT_THRESHOLD1_ENABLE (1 << 23)
+#define THERM_SHIFT_THRESHOLD1 16
+#define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1)
+#define THERM_STATUS_THRESHOLD0 (1 << 6)
+#define THERM_LOG_THRESHOLD0 (1 << 7)
+#define THERM_STATUS_THRESHOLD1 (1 << 8)
+#define THERM_LOG_THRESHOLD1 (1 << 9)
+
+/* MISC_ENABLE bits: architectural */
+#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0
+#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT)
+#define MSR_IA32_MISC_ENABLE_TCC_BIT 1
+#define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT)
+#define MSR_IA32_MISC_ENABLE_EMON_BIT 7
+#define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT)
+#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11
+#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT)
+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12
+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT)
+#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16
+#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT)
+#define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18
+#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT)
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT)
+#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23
+#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34
+#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT)
+
+/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */
+#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2
+#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT)
+#define MSR_IA32_MISC_ENABLE_TM1_BIT 3
+#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT)
+#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4
+#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6
+#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8
+#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT)
+#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9
+#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_FERR_BIT 10
+#define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT)
+#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10
+#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT)
+#define MSR_IA32_MISC_ENABLE_TM2_BIT 13
+#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT)
+#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19
+#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20
+#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT)
+#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24
+#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT)
+#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37
+#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38
+#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39
+#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
+
+/* MISC_FEATURES_ENABLES non-architectural features */
+#define MSR_MISC_FEATURES_ENABLES 0x00000140
+
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
+#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1
+
+#define MSR_IA32_TSC_DEADLINE 0x000006E0
+
+/* P4/Xeon+ specific */
+#define MSR_IA32_MCG_EAX 0x00000180
+#define MSR_IA32_MCG_EBX 0x00000181
+#define MSR_IA32_MCG_ECX 0x00000182
+#define MSR_IA32_MCG_EDX 0x00000183
+#define MSR_IA32_MCG_ESI 0x00000184
+#define MSR_IA32_MCG_EDI 0x00000185
+#define MSR_IA32_MCG_EBP 0x00000186
+#define MSR_IA32_MCG_ESP 0x00000187
+#define MSR_IA32_MCG_EFLAGS 0x00000188
+#define MSR_IA32_MCG_EIP 0x00000189
+#define MSR_IA32_MCG_RESERVED 0x0000018a
+
+/* Pentium IV performance counter MSRs */
+#define MSR_P4_BPU_PERFCTR0 0x00000300
+#define MSR_P4_BPU_PERFCTR1 0x00000301
+#define MSR_P4_BPU_PERFCTR2 0x00000302
+#define MSR_P4_BPU_PERFCTR3 0x00000303
+#define MSR_P4_MS_PERFCTR0 0x00000304
+#define MSR_P4_MS_PERFCTR1 0x00000305
+#define MSR_P4_MS_PERFCTR2 0x00000306
+#define MSR_P4_MS_PERFCTR3 0x00000307
+#define MSR_P4_FLAME_PERFCTR0 0x00000308
+#define MSR_P4_FLAME_PERFCTR1 0x00000309
+#define MSR_P4_FLAME_PERFCTR2 0x0000030a
+#define MSR_P4_FLAME_PERFCTR3 0x0000030b
+#define MSR_P4_IQ_PERFCTR0 0x0000030c
+#define MSR_P4_IQ_PERFCTR1 0x0000030d
+#define MSR_P4_IQ_PERFCTR2 0x0000030e
+#define MSR_P4_IQ_PERFCTR3 0x0000030f
+#define MSR_P4_IQ_PERFCTR4 0x00000310
+#define MSR_P4_IQ_PERFCTR5 0x00000311
+#define MSR_P4_BPU_CCCR0 0x00000360
+#define MSR_P4_BPU_CCCR1 0x00000361
+#define MSR_P4_BPU_CCCR2 0x00000362
+#define MSR_P4_BPU_CCCR3 0x00000363
+#define MSR_P4_MS_CCCR0 0x00000364
+#define MSR_P4_MS_CCCR1 0x00000365
+#define MSR_P4_MS_CCCR2 0x00000366
+#define MSR_P4_MS_CCCR3 0x00000367
+#define MSR_P4_FLAME_CCCR0 0x00000368
+#define MSR_P4_FLAME_CCCR1 0x00000369
+#define MSR_P4_FLAME_CCCR2 0x0000036a
+#define MSR_P4_FLAME_CCCR3 0x0000036b
+#define MSR_P4_IQ_CCCR0 0x0000036c
+#define MSR_P4_IQ_CCCR1 0x0000036d
+#define MSR_P4_IQ_CCCR2 0x0000036e
+#define MSR_P4_IQ_CCCR3 0x0000036f
+#define MSR_P4_IQ_CCCR4 0x00000370
+#define MSR_P4_IQ_CCCR5 0x00000371
+#define MSR_P4_ALF_ESCR0 0x000003ca
+#define MSR_P4_ALF_ESCR1 0x000003cb
+#define MSR_P4_BPU_ESCR0 0x000003b2
+#define MSR_P4_BPU_ESCR1 0x000003b3
+#define MSR_P4_BSU_ESCR0 0x000003a0
+#define MSR_P4_BSU_ESCR1 0x000003a1
+#define MSR_P4_CRU_ESCR0 0x000003b8
+#define MSR_P4_CRU_ESCR1 0x000003b9
+#define MSR_P4_CRU_ESCR2 0x000003cc
+#define MSR_P4_CRU_ESCR3 0x000003cd
+#define MSR_P4_CRU_ESCR4 0x000003e0
+#define MSR_P4_CRU_ESCR5 0x000003e1
+#define MSR_P4_DAC_ESCR0 0x000003a8
+#define MSR_P4_DAC_ESCR1 0x000003a9
+#define MSR_P4_FIRM_ESCR0 0x000003a4
+#define MSR_P4_FIRM_ESCR1 0x000003a5
+#define MSR_P4_FLAME_ESCR0 0x000003a6
+#define MSR_P4_FLAME_ESCR1 0x000003a7
+#define MSR_P4_FSB_ESCR0 0x000003a2
+#define MSR_P4_FSB_ESCR1 0x000003a3
+#define MSR_P4_IQ_ESCR0 0x000003ba
+#define MSR_P4_IQ_ESCR1 0x000003bb
+#define MSR_P4_IS_ESCR0 0x000003b4
+#define MSR_P4_IS_ESCR1 0x000003b5
+#define MSR_P4_ITLB_ESCR0 0x000003b6
+#define MSR_P4_ITLB_ESCR1 0x000003b7
+#define MSR_P4_IX_ESCR0 0x000003c8
+#define MSR_P4_IX_ESCR1 0x000003c9
+#define MSR_P4_MOB_ESCR0 0x000003aa
+#define MSR_P4_MOB_ESCR1 0x000003ab
+#define MSR_P4_MS_ESCR0 0x000003c0
+#define MSR_P4_MS_ESCR1 0x000003c1
+#define MSR_P4_PMH_ESCR0 0x000003ac
+#define MSR_P4_PMH_ESCR1 0x000003ad
+#define MSR_P4_RAT_ESCR0 0x000003bc
+#define MSR_P4_RAT_ESCR1 0x000003bd
+#define MSR_P4_SAAT_ESCR0 0x000003ae
+#define MSR_P4_SAAT_ESCR1 0x000003af
+#define MSR_P4_SSU_ESCR0 0x000003be
+#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */
+
+#define MSR_P4_TBPU_ESCR0 0x000003c2
+#define MSR_P4_TBPU_ESCR1 0x000003c3
+#define MSR_P4_TC_ESCR0 0x000003c4
+#define MSR_P4_TC_ESCR1 0x000003c5
+#define MSR_P4_U2L_ESCR0 0x000003b0
+#define MSR_P4_U2L_ESCR1 0x000003b1
+
+#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2
+
+/* Intel Core-based CPU performance counters */
+#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
+#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
+#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b
+#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d
+#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e
+#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
+
+/* Geode defined MSRs */
+#define MSR_GEODE_BUSCONT_CONF0 0x00001900
+
+/* Intel VT MSRs */
+#define MSR_IA32_VMX_BASIC 0x00000480
+#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481
+#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482
+#define MSR_IA32_VMX_EXIT_CTLS 0x00000483
+#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484
+#define MSR_IA32_VMX_MISC 0x00000485
+#define MSR_IA32_VMX_CR0_FIXED0 0x00000486
+#define MSR_IA32_VMX_CR0_FIXED1 0x00000487
+#define MSR_IA32_VMX_CR4_FIXED0 0x00000488
+#define MSR_IA32_VMX_CR4_FIXED1 0x00000489
+#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a
+#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
+#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
+#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d
+#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e
+#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f
+#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490
+#define MSR_IA32_VMX_VMFUNC 0x00000491
+
+/* VMX_BASIC bits and bitmasks */
+#define VMX_BASIC_VMCS_SIZE_SHIFT 32
+#define VMX_BASIC_TRUE_CTLS (1ULL << 55)
+#define VMX_BASIC_64 0x0001000000000000LLU
+#define VMX_BASIC_MEM_TYPE_SHIFT 50
+#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
+#define VMX_BASIC_MEM_TYPE_WB 6LLU
+#define VMX_BASIC_INOUT 0x0040000000000000LLU
+
+/* MSR_IA32_VMX_MISC bits */
+#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
+#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
+/* AMD-V MSRs */
+
+#define MSR_VM_CR 0xc0010114
+#define MSR_VM_IGNNE 0xc0010115
+#define MSR_VM_HSAVE_PA 0xc0010117
+
+#endif /* !SELFTEST_KVM_X86_H */
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
new file mode 100644
index 000000000..d30667706
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -0,0 +1,92 @@
+/*
+ * tools/testing/selftests/kvm/lib/assert.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/
+
+#include "test_util.h"
+
+#include <execinfo.h>
+#include <sys/syscall.h>
+
+#include "../../kselftest.h"
+
+/* Dumps the current stack trace to stderr. */
+static void __attribute__((noinline)) test_dump_stack(void);
+static void test_dump_stack(void)
+{
+ /*
+ * Build and run this command:
+ *
+ * addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \
+ * grep -v test_dump_stack | cat -n 1>&2
+ *
+ * Note that the spacing is different and there's no newline.
+ */
+ size_t i;
+ size_t n = 20;
+ void *stack[n];
+ const char *addr2line = "addr2line -s -e /proc/$PPID/exe -fpai";
+ const char *pipeline = "|cat -n 1>&2";
+ char cmd[strlen(addr2line) + strlen(pipeline) +
+ /* N bytes per addr * 2 digits per byte + 1 space per addr: */
+ n * (((sizeof(void *)) * 2) + 1) +
+ /* Null terminator: */
+ 1];
+ char *c;
+
+ n = backtrace(stack, n);
+ c = &cmd[0];
+ c += sprintf(c, "%s", addr2line);
+ /*
+ * Skip the first 3 frames: backtrace, test_dump_stack, and
+ * test_assert. We hope that backtrace isn't inlined and the other two
+ * we've declared noinline.
+ */
+ for (i = 2; i < n; i++)
+ c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1);
+ c += sprintf(c, "%s", pipeline);
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
+ system(cmd);
+#pragma GCC diagnostic pop
+}
+
+static pid_t _gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+
+void __attribute__((noinline))
+test_assert(bool exp, const char *exp_str,
+ const char *file, unsigned int line, const char *fmt, ...)
+{
+ va_list ap;
+
+ if (!(exp)) {
+ va_start(ap, fmt);
+
+ fprintf(stderr, "==== Test Assertion Failure ====\n"
+ " %s:%u: %s\n"
+ " pid=%d tid=%d - %s\n",
+ file, line, exp_str, getpid(), _gettid(),
+ strerror(errno));
+ test_dump_stack();
+ if (fmt) {
+ fputs(" ", stderr);
+ vfprintf(stderr, fmt, ap);
+ fputs("\n", stderr);
+ }
+ va_end(ap);
+
+ if (errno == EACCES)
+ ksft_exit_skip("Access denied - Exiting.\n");
+ exit(254);
+ }
+
+ return;
+}
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
new file mode 100644
index 000000000..5eb857584
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -0,0 +1,197 @@
+/*
+ * tools/testing/selftests/kvm/lib/elf.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+
+#include <bits/endian.h>
+#include <linux/elf.h>
+
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+
+static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
+{
+ off_t offset_rv;
+
+ /* Open the ELF file. */
+ int fd;
+ fd = open(filename, O_RDONLY);
+ TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n"
+ " filename: %s\n"
+ " rv: %i errno: %i", filename, fd, errno);
+
+ /* Read in and validate ELF Identification Record.
+ * The ELF Identification record is the first 16 (EI_NIDENT) bytes
+ * of the ELF header, which is at the beginning of the ELF file.
+ * For now it is only safe to read the first EI_NIDENT bytes. Once
+ * read and validated, the value of e_ehsize can be used to determine
+ * the real size of the ELF header.
+ */
+ unsigned char ident[EI_NIDENT];
+ test_read(fd, ident, sizeof(ident));
+ TEST_ASSERT((ident[EI_MAG0] == ELFMAG0) && (ident[EI_MAG1] == ELFMAG1)
+ && (ident[EI_MAG2] == ELFMAG2) && (ident[EI_MAG3] == ELFMAG3),
+ "ELF MAGIC Mismatch,\n"
+ " filename: %s\n"
+ " ident[EI_MAG0 - EI_MAG3]: %02x %02x %02x %02x\n"
+ " Expected: %02x %02x %02x %02x",
+ filename,
+ ident[EI_MAG0], ident[EI_MAG1], ident[EI_MAG2], ident[EI_MAG3],
+ ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3);
+ TEST_ASSERT(ident[EI_CLASS] == ELFCLASS64,
+ "Current implementation only able to handle ELFCLASS64,\n"
+ " filename: %s\n"
+ " ident[EI_CLASS]: %02x\n"
+ " expected: %02x",
+ filename,
+ ident[EI_CLASS], ELFCLASS64);
+ TEST_ASSERT(((BYTE_ORDER == LITTLE_ENDIAN)
+ && (ident[EI_DATA] == ELFDATA2LSB))
+ || ((BYTE_ORDER == BIG_ENDIAN)
+ && (ident[EI_DATA] == ELFDATA2MSB)), "Current "
+ "implementation only able to handle\n"
+ "cases where the host and ELF file endianness\n"
+ "is the same:\n"
+ " host BYTE_ORDER: %u\n"
+ " host LITTLE_ENDIAN: %u\n"
+ " host BIG_ENDIAN: %u\n"
+ " ident[EI_DATA]: %u\n"
+ " ELFDATA2LSB: %u\n"
+ " ELFDATA2MSB: %u",
+ BYTE_ORDER, LITTLE_ENDIAN, BIG_ENDIAN,
+ ident[EI_DATA], ELFDATA2LSB, ELFDATA2MSB);
+ TEST_ASSERT(ident[EI_VERSION] == EV_CURRENT,
+ "Current implementation only able to handle current "
+ "ELF version,\n"
+ " filename: %s\n"
+ " ident[EI_VERSION]: %02x\n"
+ " expected: %02x",
+ filename, ident[EI_VERSION], EV_CURRENT);
+
+ /* Read in the ELF header.
+ * With the ELF Identification portion of the ELF header
+ * validated, especially that the value at EI_VERSION is
+ * as expected, it is now safe to read the entire ELF header.
+ */
+ offset_rv = lseek(fd, 0, SEEK_SET);
+ TEST_ASSERT(offset_rv == 0, "Seek to ELF header failed,\n"
+ " rv: %zi expected: %i", offset_rv, 0);
+ test_read(fd, hdrp, sizeof(*hdrp));
+ TEST_ASSERT(hdrp->e_phentsize == sizeof(Elf64_Phdr),
+ "Unexpected physical header size,\n"
+ " hdrp->e_phentsize: %x\n"
+ " expected: %zx",
+ hdrp->e_phentsize, sizeof(Elf64_Phdr));
+ TEST_ASSERT(hdrp->e_shentsize == sizeof(Elf64_Shdr),
+ "Unexpected section header size,\n"
+ " hdrp->e_shentsize: %x\n"
+ " expected: %zx",
+ hdrp->e_shentsize, sizeof(Elf64_Shdr));
+}
+
+/* VM ELF Load
+ *
+ * Input Args:
+ * filename - Path to ELF file
+ *
+ * Output Args: None
+ *
+ * Input/Output Args:
+ * vm - Pointer to opaque type that describes the VM.
+ *
+ * Return: None, TEST_ASSERT failures for all error conditions
+ *
+ * Loads the program image of the ELF file specified by filename,
+ * into the virtual address space of the VM pointed to by vm. On entry
+ * the VM needs to not be using any of the virtual address space used
+ * by the image and it needs to have sufficient available physical pages, to
+ * back the virtual pages used to load the image.
+ */
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
+ uint32_t data_memslot, uint32_t pgd_memslot)
+{
+ off_t offset, offset_rv;
+ Elf64_Ehdr hdr;
+
+ /* Open the ELF file. */
+ int fd;
+ fd = open(filename, O_RDONLY);
+ TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n"
+ " filename: %s\n"
+ " rv: %i errno: %i", filename, fd, errno);
+
+ /* Read in the ELF header. */
+ elfhdr_get(filename, &hdr);
+
+ /* For each program header.
+ * The following ELF header members specify the location
+ * and size of the program headers:
+ *
+ * e_phoff - File offset to start of program headers
+ * e_phentsize - Size of each program header
+ * e_phnum - Number of program header entries
+ */
+ for (unsigned int n1 = 0; n1 < hdr.e_phnum; n1++) {
+ /* Seek to the beginning of the program header. */
+ offset = hdr.e_phoff + (n1 * hdr.e_phentsize);
+ offset_rv = lseek(fd, offset, SEEK_SET);
+ TEST_ASSERT(offset_rv == offset,
+ "Failed to seek to begining of program header %u,\n"
+ " filename: %s\n"
+ " rv: %jd errno: %i",
+ n1, filename, (intmax_t) offset_rv, errno);
+
+ /* Read in the program header. */
+ Elf64_Phdr phdr;
+ test_read(fd, &phdr, sizeof(phdr));
+
+ /* Skip if this header doesn't describe a loadable segment. */
+ if (phdr.p_type != PT_LOAD)
+ continue;
+
+ /* Allocate memory for this segment within the VM. */
+ TEST_ASSERT(phdr.p_memsz > 0, "Unexpected loadable segment "
+ "memsize of 0,\n"
+ " phdr index: %u p_memsz: 0x%" PRIx64,
+ n1, (uint64_t) phdr.p_memsz);
+ vm_vaddr_t seg_vstart = phdr.p_vaddr;
+ seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1);
+ vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1;
+ seg_vend |= vm->page_size - 1;
+ size_t seg_size = seg_vend - seg_vstart + 1;
+
+ vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart,
+ data_memslot, pgd_memslot);
+ TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
+ "virtual memory for segment at requested min addr,\n"
+ " segment idx: %u\n"
+ " seg_vstart: 0x%lx\n"
+ " vaddr: 0x%lx",
+ n1, seg_vstart, vaddr);
+ memset(addr_gva2hva(vm, vaddr), 0, seg_size);
+ /* TODO(lhuemill): Set permissions of each memory segment
+ * based on the least-significant 3 bits of phdr.p_flags.
+ */
+
+ /* Load portion of initial state that is contained within
+ * the ELF file.
+ */
+ if (phdr.p_filesz) {
+ offset_rv = lseek(fd, phdr.p_offset, SEEK_SET);
+ TEST_ASSERT(offset_rv == phdr.p_offset,
+ "Seek to program segment offset failed,\n"
+ " program header idx: %u errno: %i\n"
+ " offset_rv: 0x%jx\n"
+ " expected: 0x%jx\n",
+ n1, errno, (intmax_t) offset_rv,
+ (intmax_t) phdr.p_offset);
+ test_read(fd, addr_gva2hva(vm, phdr.p_vaddr),
+ phdr.p_filesz);
+ }
+ }
+}
diff --git a/tools/testing/selftests/kvm/lib/io.c b/tools/testing/selftests/kvm/lib/io.c
new file mode 100644
index 000000000..cff869ffe
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/io.c
@@ -0,0 +1,158 @@
+/*
+ * tools/testing/selftests/kvm/lib/io.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+
+/* Test Write
+ *
+ * A wrapper for write(2), that automatically handles the following
+ * special conditions:
+ *
+ * + Interrupted system call (EINTR)
+ * + Write of less than requested amount
+ * + Non-block return (EAGAIN)
+ *
+ * For each of the above, an additional write is performed to automatically
+ * continue writing the requested data.
+ * There are also many cases where write(2) can return an unexpected
+ * error (e.g. EIO). Such errors cause a TEST_ASSERT failure.
+ *
+ * Note, for function signature compatibility with write(2), this function
+ * returns the number of bytes written, but that value will always be equal
+ * to the number of requested bytes. All other conditions in this and
+ * future enhancements to this function either automatically issue another
+ * write(2) or cause a TEST_ASSERT failure.
+ *
+ * Args:
+ * fd - Opened file descriptor to file to be written.
+ * count - Number of bytes to write.
+ *
+ * Output:
+ * buf - Starting address of data to be written.
+ *
+ * Return:
+ * On success, number of bytes written.
+ * On failure, a TEST_ASSERT failure is caused.
+ */
+ssize_t test_write(int fd, const void *buf, size_t count)
+{
+ ssize_t rc;
+ ssize_t num_written = 0;
+ size_t num_left = count;
+ const char *ptr = buf;
+
+ /* Note: Count of zero is allowed (see "RETURN VALUE" portion of
+ * write(2) manpage for details.
+ */
+ TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count);
+
+ do {
+ rc = write(fd, ptr, num_left);
+
+ switch (rc) {
+ case -1:
+ TEST_ASSERT(errno == EAGAIN || errno == EINTR,
+ "Unexpected write failure,\n"
+ " rc: %zi errno: %i", rc, errno);
+ continue;
+
+ case 0:
+ TEST_ASSERT(false, "Unexpected EOF,\n"
+ " rc: %zi num_written: %zi num_left: %zu",
+ rc, num_written, num_left);
+ break;
+
+ default:
+ TEST_ASSERT(rc >= 0, "Unexpected ret from write,\n"
+ " rc: %zi errno: %i", rc, errno);
+ num_written += rc;
+ num_left -= rc;
+ ptr += rc;
+ break;
+ }
+ } while (num_written < count);
+
+ return num_written;
+}
+
+/* Test Read
+ *
+ * A wrapper for read(2), that automatically handles the following
+ * special conditions:
+ *
+ * + Interrupted system call (EINTR)
+ * + Read of less than requested amount
+ * + Non-block return (EAGAIN)
+ *
+ * For each of the above, an additional read is performed to automatically
+ * continue reading the requested data.
+ * There are also many cases where read(2) can return an unexpected
+ * error (e.g. EIO). Such errors cause a TEST_ASSERT failure. Note,
+ * it is expected that the file opened by fd at the current file position
+ * contains at least the number of requested bytes to be read. A TEST_ASSERT
+ * failure is produced if an End-Of-File condition occurs, before all the
+ * data is read. It is the callers responsibility to assure that sufficient
+ * data exists.
+ *
+ * Note, for function signature compatibility with read(2), this function
+ * returns the number of bytes read, but that value will always be equal
+ * to the number of requested bytes. All other conditions in this and
+ * future enhancements to this function either automatically issue another
+ * read(2) or cause a TEST_ASSERT failure.
+ *
+ * Args:
+ * fd - Opened file descriptor to file to be read.
+ * count - Number of bytes to read.
+ *
+ * Output:
+ * buf - Starting address of where to write the bytes read.
+ *
+ * Return:
+ * On success, number of bytes read.
+ * On failure, a TEST_ASSERT failure is caused.
+ */
+ssize_t test_read(int fd, void *buf, size_t count)
+{
+ ssize_t rc;
+ ssize_t num_read = 0;
+ size_t num_left = count;
+ char *ptr = buf;
+
+ /* Note: Count of zero is allowed (see "If count is zero" portion of
+ * read(2) manpage for details.
+ */
+ TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count);
+
+ do {
+ rc = read(fd, ptr, num_left);
+
+ switch (rc) {
+ case -1:
+ TEST_ASSERT(errno == EAGAIN || errno == EINTR,
+ "Unexpected read failure,\n"
+ " rc: %zi errno: %i", rc, errno);
+ break;
+
+ case 0:
+ TEST_ASSERT(false, "Unexpected EOF,\n"
+ " rc: %zi num_read: %zi num_left: %zu",
+ rc, num_read, num_left);
+ break;
+
+ default:
+ TEST_ASSERT(rc > 0, "Unexpected ret from read,\n"
+ " rc: %zi errno: %i", rc, errno);
+ num_read += rc;
+ num_left -= rc;
+ ptr += rc;
+ break;
+ }
+ } while (num_read < count);
+
+ return num_read;
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
new file mode 100644
index 000000000..b138fd5e4
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -0,0 +1,1680 @@
+/*
+ * tools/testing/selftests/kvm/lib/kvm_util.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+
+#include <assert.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <linux/kernel.h>
+
+#define KVM_DEV_PATH "/dev/kvm"
+
+#define KVM_UTIL_PGS_PER_HUGEPG 512
+#define KVM_UTIL_MIN_PADDR 0x2000
+
+/* Aligns x up to the next multiple of size. Size must be a power of 2. */
+static void *align(void *x, size_t size)
+{
+ size_t mask = size - 1;
+ TEST_ASSERT(size != 0 && !(size & (size - 1)),
+ "size not a power of 2: %lu", size);
+ return (void *) (((size_t) x + mask) & ~mask);
+}
+
+/* Capability
+ *
+ * Input Args:
+ * cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return:
+ * On success, the Value corresponding to the capability (KVM_CAP_*)
+ * specified by the value of cap. On failure a TEST_ASSERT failure
+ * is produced.
+ *
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
+ */
+int kvm_check_cap(long cap)
+{
+ int ret;
+ int kvm_fd;
+
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
+ TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
+ " rc: %i errno: %i", ret, errno);
+
+ close(kvm_fd);
+
+ return ret;
+}
+
+/* VM Enable Capability
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
+ *
+ * Enables a capability (KVM_CAP_*) on the VM.
+ */
+int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
+{
+ int ret;
+
+ ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
+ TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
+ " rc: %i errno: %i", ret, errno);
+
+ return ret;
+}
+
+static void vm_open(struct kvm_vm *vm, int perm)
+{
+ vm->kvm_fd = open(KVM_DEV_PATH, perm);
+ if (vm->kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ /* Create VM. */
+ vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, NULL);
+ TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
+ "rc: %i errno: %i", vm->fd, errno);
+}
+
+/* VM Create
+ *
+ * Input Args:
+ * mode - VM Mode (e.g. VM_MODE_FLAT48PG)
+ * phy_pages - Physical memory pages
+ * perm - permission
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ *
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG).
+ * When phy_pages is non-zero, a memory region of phy_pages physical pages
+ * is created and mapped starting at guest physical address 0. The file
+ * descriptor to control the created VM is created with the permissions
+ * given by perm (e.g. O_RDWR).
+ */
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
+{
+ struct kvm_vm *vm;
+ int kvm_fd;
+
+ /* Allocate memory. */
+ vm = calloc(1, sizeof(*vm));
+ TEST_ASSERT(vm != NULL, "Insufficent Memory");
+
+ vm->mode = mode;
+ vm_open(vm, perm);
+
+ /* Setup mode specific traits. */
+ switch (vm->mode) {
+ case VM_MODE_FLAT48PG:
+ vm->page_size = 0x1000;
+ vm->page_shift = 12;
+
+ /* Limit to 48-bit canonical virtual addresses. */
+ vm->vpages_valid = sparsebit_alloc();
+ sparsebit_set_num(vm->vpages_valid,
+ 0, (1ULL << (48 - 1)) >> vm->page_shift);
+ sparsebit_set_num(vm->vpages_valid,
+ (~((1ULL << (48 - 1)) - 1)) >> vm->page_shift,
+ (1ULL << (48 - 1)) >> vm->page_shift);
+
+ /* Limit physical addresses to 52-bits. */
+ vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1;
+ break;
+
+ default:
+ TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
+ }
+
+ /* Allocate and setup memory for guest. */
+ vm->vpages_mapped = sparsebit_alloc();
+ if (phy_pages != 0)
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ 0, 0, phy_pages, 0);
+
+ return vm;
+}
+
+/* VM Restart
+ *
+ * Input Args:
+ * vm - VM that has been released before
+ * perm - permission
+ *
+ * Output Args: None
+ *
+ * Reopens the file descriptors associated to the VM and reinstates the
+ * global state, such as the irqchip and the memory regions that are mapped
+ * into the guest.
+ */
+void kvm_vm_restart(struct kvm_vm *vmp, int perm)
+{
+ struct userspace_mem_region *region;
+
+ vm_open(vmp, perm);
+ if (vmp->has_irqchip)
+ vm_create_irqchip(vmp);
+
+ for (region = vmp->userspace_mem_region_head; region;
+ region = region->next) {
+ int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ " rc: %i errno: %i\n"
+ " slot: %u flags: 0x%x\n"
+ " guest_phys_addr: 0x%lx size: 0x%lx",
+ ret, errno, region->region.slot, region->region.flags,
+ region->region.guest_phys_addr,
+ region->region.memory_size);
+ }
+}
+
+void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
+{
+ struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
+ int ret;
+
+ ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
+ TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
+ strerror(-ret));
+}
+
+/* Userspace Memory Region Find
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * start - Starting VM physical address
+ * end - Ending VM physical address, inclusive.
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to overlapping region, NULL if no such region.
+ *
+ * Searches for a region with any physical memory that overlaps with
+ * any portion of the guest physical addresses from start to end
+ * inclusive. If multiple overlapping regions exist, a pointer to any
+ * of the regions is returned. Null is returned only when no overlapping
+ * region exists.
+ */
+static struct userspace_mem_region *userspace_mem_region_find(
+ struct kvm_vm *vm, uint64_t start, uint64_t end)
+{
+ struct userspace_mem_region *region;
+
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ uint64_t existing_start = region->region.guest_phys_addr;
+ uint64_t existing_end = region->region.guest_phys_addr
+ + region->region.memory_size - 1;
+ if (start <= existing_end && end >= existing_start)
+ return region;
+ }
+
+ return NULL;
+}
+
+/* KVM Userspace Memory Region Find
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * start - Starting VM physical address
+ * end - Ending VM physical address, inclusive.
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to overlapping region, NULL if no such region.
+ *
+ * Public interface to userspace_mem_region_find. Allows tests to look up
+ * the memslot datastructure for a given range of guest physical memory.
+ */
+struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
+ uint64_t end)
+{
+ struct userspace_mem_region *region;
+
+ region = userspace_mem_region_find(vm, start, end);
+ if (!region)
+ return NULL;
+
+ return &region->region;
+}
+
+/* VCPU Find
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to VCPU structure
+ *
+ * Locates a vcpu structure that describes the VCPU specified by vcpuid and
+ * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU
+ * for the specified vcpuid.
+ */
+struct vcpu *vcpu_find(struct kvm_vm *vm,
+ uint32_t vcpuid)
+{
+ struct vcpu *vcpup;
+
+ for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) {
+ if (vcpup->id == vcpuid)
+ return vcpup;
+ }
+
+ return NULL;
+}
+
+/* VM VCPU Remove
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None, TEST_ASSERT failures for all error conditions
+ *
+ * Within the VM specified by vm, removes the VCPU given by vcpuid.
+ */
+static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ ret = munmap(vcpu->state, sizeof(*vcpu->state));
+ TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
+ "errno: %i", ret, errno);
+ close(vcpu->fd);
+ TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
+ "errno: %i", ret, errno);
+
+ if (vcpu->next)
+ vcpu->next->prev = vcpu->prev;
+ if (vcpu->prev)
+ vcpu->prev->next = vcpu->next;
+ else
+ vm->vcpu_head = vcpu->next;
+ free(vcpu);
+}
+
+void kvm_vm_release(struct kvm_vm *vmp)
+{
+ int ret;
+
+ /* Free VCPUs. */
+ while (vmp->vcpu_head)
+ vm_vcpu_rm(vmp, vmp->vcpu_head->id);
+
+ /* Close file descriptor for the VM. */
+ ret = close(vmp->fd);
+ TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
+ " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
+
+ close(vmp->kvm_fd);
+ TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n"
+ " vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
+}
+
+/* Destroys and frees the VM pointed to by vmp.
+ */
+void kvm_vm_free(struct kvm_vm *vmp)
+{
+ int ret;
+
+ if (vmp == NULL)
+ return;
+
+ /* Free userspace_mem_regions. */
+ while (vmp->userspace_mem_region_head) {
+ struct userspace_mem_region *region
+ = vmp->userspace_mem_region_head;
+
+ region->region.memory_size = 0;
+ ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION,
+ &region->region);
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+
+ vmp->userspace_mem_region_head = region->next;
+ sparsebit_free(&region->unused_phy_pages);
+ ret = munmap(region->mmap_start, region->mmap_size);
+ TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i",
+ ret, errno);
+
+ free(region);
+ }
+
+ /* Free sparsebit arrays. */
+ sparsebit_free(&vmp->vpages_valid);
+ sparsebit_free(&vmp->vpages_mapped);
+
+ kvm_vm_release(vmp);
+
+ /* Free the structure describing the VM. */
+ free(vmp);
+}
+
+/* Memory Compare, host virtual to guest virtual
+ *
+ * Input Args:
+ * hva - Starting host virtual address
+ * vm - Virtual Machine
+ * gva - Starting guest virtual address
+ * len - number of bytes to compare
+ *
+ * Output Args: None
+ *
+ * Input/Output Args: None
+ *
+ * Return:
+ * Returns 0 if the bytes starting at hva for a length of len
+ * are equal the guest virtual bytes starting at gva. Returns
+ * a value < 0, if bytes at hva are less than those at gva.
+ * Otherwise a value > 0 is returned.
+ *
+ * Compares the bytes starting at the host virtual address hva, for
+ * a length of len, to the guest bytes starting at the guest virtual
+ * address given by gva.
+ */
+int kvm_memcmp_hva_gva(void *hva,
+ struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
+{
+ size_t amt;
+
+ /* Compare a batch of bytes until either a match is found
+ * or all the bytes have been compared.
+ */
+ for (uintptr_t offset = 0; offset < len; offset += amt) {
+ uintptr_t ptr1 = (uintptr_t)hva + offset;
+
+ /* Determine host address for guest virtual address
+ * at offset.
+ */
+ uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
+
+ /* Determine amount to compare on this pass.
+ * Don't allow the comparsion to cross a page boundary.
+ */
+ amt = len - offset;
+ if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
+ amt = vm->page_size - (ptr1 % vm->page_size);
+ if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
+ amt = vm->page_size - (ptr2 % vm->page_size);
+
+ assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
+ assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
+
+ /* Perform the comparison. If there is a difference
+ * return that result to the caller, otherwise need
+ * to continue on looking for a mismatch.
+ */
+ int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
+ if (ret != 0)
+ return ret;
+ }
+
+ /* No mismatch found. Let the caller know the two memory
+ * areas are equal.
+ */
+ return 0;
+}
+
+/* Allocate an instance of struct kvm_cpuid2
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the allocated struct. The caller is responsible
+ * for freeing this struct.
+ *
+ * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
+ * array to be decided at allocation time, allocation is slightly
+ * complicated. This function uses a reasonable default length for
+ * the array and performs the appropriate allocation.
+ */
+static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
+{
+ struct kvm_cpuid2 *cpuid;
+ int nent = 100;
+ size_t size;
+
+ size = sizeof(*cpuid);
+ size += nent * sizeof(struct kvm_cpuid_entry2);
+ cpuid = malloc(size);
+ if (!cpuid) {
+ perror("malloc");
+ abort();
+ }
+
+ cpuid->nent = nent;
+
+ return cpuid;
+}
+
+/* KVM Supported CPUID Get
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ *
+ * Return: The supported KVM CPUID
+ *
+ * Get the guest CPUID supported by KVM.
+ */
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
+{
+ static struct kvm_cpuid2 *cpuid;
+ int ret;
+ int kvm_fd;
+
+ if (cpuid)
+ return cpuid;
+
+ cpuid = allocate_kvm_cpuid2();
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
+ TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
+ ret, errno);
+
+ close(kvm_fd);
+ return cpuid;
+}
+
+/* Locate a cpuid entry.
+ *
+ * Input Args:
+ * cpuid: The cpuid.
+ * function: The function of the cpuid entry to find.
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the cpuid entry. Never returns NULL.
+ */
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
+{
+ struct kvm_cpuid2 *cpuid;
+ struct kvm_cpuid_entry2 *entry = NULL;
+ int i;
+
+ cpuid = kvm_get_supported_cpuid();
+ for (i = 0; i < cpuid->nent; i++) {
+ if (cpuid->entries[i].function == function &&
+ cpuid->entries[i].index == index) {
+ entry = &cpuid->entries[i];
+ break;
+ }
+ }
+
+ TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
+ function, index);
+ return entry;
+}
+
+/* VM Userspace Memory Region Add
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * backing_src - Storage source for this region.
+ * NULL to use anonymous memory.
+ * guest_paddr - Starting guest physical address
+ * slot - KVM region slot
+ * npages - Number of physical pages
+ * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Allocates a memory area of the number of pages specified by npages
+ * and maps it to the VM specified by vm, at a starting physical address
+ * given by guest_paddr. The region is created with a KVM region slot
+ * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The
+ * region is created with the flags given by flags.
+ */
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags)
+{
+ int ret;
+ unsigned long pmem_size = 0;
+ struct userspace_mem_region *region;
+ size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
+
+ TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
+ "address not on a page boundary.\n"
+ " guest_paddr: 0x%lx vm->page_size: 0x%x",
+ guest_paddr, vm->page_size);
+ TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
+ <= vm->max_gfn, "Physical range beyond maximum "
+ "supported physical address,\n"
+ " guest_paddr: 0x%lx npages: 0x%lx\n"
+ " vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ guest_paddr, npages, vm->max_gfn, vm->page_size);
+
+ /* Confirm a mem region with an overlapping address doesn't
+ * already exist.
+ */
+ region = (struct userspace_mem_region *) userspace_mem_region_find(
+ vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
+ if (region != NULL)
+ TEST_ASSERT(false, "overlapping userspace_mem_region already "
+ "exists\n"
+ " requested guest_paddr: 0x%lx npages: 0x%lx "
+ "page_size: 0x%x\n"
+ " existing guest_paddr: 0x%lx size: 0x%lx",
+ guest_paddr, npages, vm->page_size,
+ (uint64_t) region->region.guest_phys_addr,
+ (uint64_t) region->region.memory_size);
+
+ /* Confirm no region with the requested slot already exists. */
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if (region->region.slot == slot)
+ break;
+ }
+ if (region != NULL)
+ TEST_ASSERT(false, "A mem region with the requested slot "
+ "already exists.\n"
+ " requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
+ " existing slot: %u paddr: 0x%lx size: 0x%lx",
+ slot, guest_paddr, npages,
+ region->region.slot,
+ (uint64_t) region->region.guest_phys_addr,
+ (uint64_t) region->region.memory_size);
+
+ /* Allocate and initialize new mem region structure. */
+ region = calloc(1, sizeof(*region));
+ TEST_ASSERT(region != NULL, "Insufficient Memory");
+ region->mmap_size = npages * vm->page_size;
+
+ /* Enough memory to align up to a huge page. */
+ if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
+ region->mmap_size += huge_page_size;
+ region->mmap_start = mmap(NULL, region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS
+ | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0),
+ -1, 0);
+ TEST_ASSERT(region->mmap_start != MAP_FAILED,
+ "test_malloc failed, mmap_start: %p errno: %i",
+ region->mmap_start, errno);
+
+ /* Align THP allocation up to start of a huge page. */
+ region->host_mem = align(region->mmap_start,
+ src_type == VM_MEM_SRC_ANONYMOUS_THP ? huge_page_size : 1);
+
+ /* As needed perform madvise */
+ if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
+ ret = madvise(region->host_mem, npages * vm->page_size,
+ src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
+ TEST_ASSERT(ret == 0, "madvise failed,\n"
+ " addr: %p\n"
+ " length: 0x%lx\n"
+ " src_type: %x",
+ region->host_mem, npages * vm->page_size, src_type);
+ }
+
+ region->unused_phy_pages = sparsebit_alloc();
+ sparsebit_set_num(region->unused_phy_pages,
+ guest_paddr >> vm->page_shift, npages);
+ region->region.slot = slot;
+ region->region.flags = flags;
+ region->region.guest_phys_addr = guest_paddr;
+ region->region.memory_size = npages * vm->page_size;
+ region->region.userspace_addr = (uintptr_t) region->host_mem;
+ ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ " rc: %i errno: %i\n"
+ " slot: %u flags: 0x%x\n"
+ " guest_phys_addr: 0x%lx size: 0x%lx",
+ ret, errno, slot, flags,
+ guest_paddr, (uint64_t) region->region.memory_size);
+
+ /* Add to linked-list of memory regions. */
+ if (vm->userspace_mem_region_head)
+ vm->userspace_mem_region_head->prev = region;
+ region->next = vm->userspace_mem_region_head;
+ vm->userspace_mem_region_head = region;
+}
+
+/* Memslot to region
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * memslot - KVM memory slot ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to memory region structure that describe memory region
+ * using kvm memory slot ID given by memslot. TEST_ASSERT failure
+ * on error (e.g. currently no memory region using memslot as a KVM
+ * memory slot ID).
+ */
+static struct userspace_mem_region *memslot2region(struct kvm_vm *vm,
+ uint32_t memslot)
+{
+ struct userspace_mem_region *region;
+
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if (region->region.slot == memslot)
+ break;
+ }
+ if (region == NULL) {
+ fprintf(stderr, "No mem region with the requested slot found,\n"
+ " requested slot: %u\n", memslot);
+ fputs("---- vm dump ----\n", stderr);
+ vm_dump(stderr, vm, 2);
+ TEST_ASSERT(false, "Mem region not found");
+ }
+
+ return region;
+}
+
+/* VM Memory Region Flags Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * flags - Starting guest physical address
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the flags of the memory region specified by the value of slot,
+ * to the values given by flags.
+ */
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
+{
+ int ret;
+ struct userspace_mem_region *region;
+
+ /* Locate memory region. */
+ region = memslot2region(vm, slot);
+
+ region->region.flags = flags;
+
+ ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ " rc: %i errno: %i slot: %u flags: 0x%x",
+ ret, errno, slot, flags);
+}
+
+/* VCPU mmap Size
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Size of VCPU state
+ *
+ * Returns the size of the structure pointed to by the return value
+ * of vcpu_state().
+ */
+static int vcpu_mmap_sz(void)
+{
+ int dev_fd, ret;
+
+ dev_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (dev_fd < 0)
+ exit(KSFT_SKIP);
+
+ ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
+ TEST_ASSERT(ret >= sizeof(struct kvm_run),
+ "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
+ __func__, ret, errno);
+
+ close(dev_fd);
+
+ return ret;
+}
+
+/* VM VCPU Add
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Creates and adds to the VM specified by vm and virtual CPU with
+ * the ID given by vcpuid.
+ */
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot)
+{
+ struct vcpu *vcpu;
+
+ /* Confirm a vcpu with the specified id doesn't already exist. */
+ vcpu = vcpu_find(vm, vcpuid);
+ if (vcpu != NULL)
+ TEST_ASSERT(false, "vcpu with the specified id "
+ "already exists,\n"
+ " requested vcpuid: %u\n"
+ " existing vcpuid: %u state: %p",
+ vcpuid, vcpu->id, vcpu->state);
+
+ /* Allocate and initialize new vcpu structure. */
+ vcpu = calloc(1, sizeof(*vcpu));
+ TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
+ vcpu->id = vcpuid;
+ vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
+ TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
+ vcpu->fd, errno);
+
+ TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
+ "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
+ vcpu_mmap_sz(), sizeof(*vcpu->state));
+ vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
+ PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
+ TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
+ "vcpu id: %u errno: %i", vcpuid, errno);
+
+ /* Add to linked-list of VCPUs. */
+ if (vm->vcpu_head)
+ vm->vcpu_head->prev = vcpu;
+ vcpu->next = vm->vcpu_head;
+ vm->vcpu_head = vcpu;
+
+ vcpu_setup(vm, vcpuid, pgd_memslot, gdt_memslot);
+}
+
+/* VM Virtual Address Unused Gap
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * sz - Size (bytes)
+ * vaddr_min - Minimum Virtual Address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Lowest virtual address at or below vaddr_min, with at least
+ * sz unused bytes. TEST_ASSERT failure if no area of at least
+ * size sz is available.
+ *
+ * Within the VM specified by vm, locates the lowest starting virtual
+ * address >= vaddr_min, that has at least sz unallocated bytes. A
+ * TEST_ASSERT failure occurs for invalid input or no area of at least
+ * sz unallocated bytes >= vaddr_min is available.
+ */
+static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
+ vm_vaddr_t vaddr_min)
+{
+ uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
+
+ /* Determine lowest permitted virtual page index. */
+ uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
+ if ((pgidx_start * vm->page_size) < vaddr_min)
+ goto no_va_found;
+
+ /* Loop over section with enough valid virtual page indexes. */
+ if (!sparsebit_is_set_num(vm->vpages_valid,
+ pgidx_start, pages))
+ pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
+ pgidx_start, pages);
+ do {
+ /*
+ * Are there enough unused virtual pages available at
+ * the currently proposed starting virtual page index.
+ * If not, adjust proposed starting index to next
+ * possible.
+ */
+ if (sparsebit_is_clear_num(vm->vpages_mapped,
+ pgidx_start, pages))
+ goto va_found;
+ pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
+ pgidx_start, pages);
+ if (pgidx_start == 0)
+ goto no_va_found;
+
+ /*
+ * If needed, adjust proposed starting virtual address,
+ * to next range of valid virtual addresses.
+ */
+ if (!sparsebit_is_set_num(vm->vpages_valid,
+ pgidx_start, pages)) {
+ pgidx_start = sparsebit_next_set_num(
+ vm->vpages_valid, pgidx_start, pages);
+ if (pgidx_start == 0)
+ goto no_va_found;
+ }
+ } while (pgidx_start != 0);
+
+no_va_found:
+ TEST_ASSERT(false, "No vaddr of specified pages available, "
+ "pages: 0x%lx", pages);
+
+ /* NOT REACHED */
+ return -1;
+
+va_found:
+ TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
+ pgidx_start, pages),
+ "Unexpected, invalid virtual page index range,\n"
+ " pgidx_start: 0x%lx\n"
+ " pages: 0x%lx",
+ pgidx_start, pages);
+ TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
+ pgidx_start, pages),
+ "Unexpected, pages already mapped,\n"
+ " pgidx_start: 0x%lx\n"
+ " pages: 0x%lx",
+ pgidx_start, pages);
+
+ return pgidx_start * vm->page_size;
+}
+
+/* VM Virtual Address Allocate
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * sz - Size in bytes
+ * vaddr_min - Minimum starting virtual address
+ * data_memslot - Memory region slot for data pages
+ * pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting guest virtual address
+ *
+ * Allocates at least sz bytes within the virtual address space of the vm
+ * given by vm. The allocated bytes are mapped to a virtual address >=
+ * the address given by vaddr_min. Note that each allocation uses a
+ * a unique set of pages, with the minimum real allocation being at least
+ * a page.
+ */
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+ uint32_t data_memslot, uint32_t pgd_memslot)
+{
+ uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
+
+ virt_pgd_alloc(vm, pgd_memslot);
+
+ /* Find an unused range of virtual page addresses of at least
+ * pages in length.
+ */
+ vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
+
+ /* Map the virtual pages. */
+ for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
+ pages--, vaddr += vm->page_size) {
+ vm_paddr_t paddr;
+
+ paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot);
+
+ virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+
+ sparsebit_set(vm->vpages_mapped,
+ vaddr >> vm->page_shift);
+ }
+
+ return vaddr_start;
+}
+
+/*
+ * Map a range of VM virtual address to the VM's physical address
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vaddr - Virtuall address to map
+ * paddr - VM Physical Address
+ * size - The size of the range to map
+ * pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a virtual translation for the
+ * page range starting at vaddr to the page range starting at paddr.
+ */
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ size_t size, uint32_t pgd_memslot)
+{
+ size_t page_size = vm->page_size;
+ size_t npages = size / page_size;
+
+ TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
+ TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
+
+ while (npages--) {
+ virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+ vaddr += page_size;
+ paddr += page_size;
+ }
+}
+
+/* Address VM Physical to Host Virtual
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gpa - VM physical address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent host virtual address
+ *
+ * Locates the memory region containing the VM physical address given
+ * by gpa, within the VM given by vm. When found, the host virtual
+ * address providing the memory to the vm physical address is returned.
+ * A TEST_ASSERT failure occurs if no region containing gpa exists.
+ */
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+ struct userspace_mem_region *region;
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if ((gpa >= region->region.guest_phys_addr)
+ && (gpa <= (region->region.guest_phys_addr
+ + region->region.memory_size - 1)))
+ return (void *) ((uintptr_t) region->host_mem
+ + (gpa - region->region.guest_phys_addr));
+ }
+
+ TEST_ASSERT(false, "No vm physical memory at 0x%lx", gpa);
+ return NULL;
+}
+
+/* Address Host Virtual to VM Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * hva - Host virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Locates the memory region containing the host virtual address given
+ * by hva, within the VM given by vm. When found, the equivalent
+ * VM physical address is returned. A TEST_ASSERT failure occurs if no
+ * region containing hva exists.
+ */
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
+{
+ struct userspace_mem_region *region;
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if ((hva >= region->host_mem)
+ && (hva <= (region->host_mem
+ + region->region.memory_size - 1)))
+ return (vm_paddr_t) ((uintptr_t)
+ region->region.guest_phys_addr
+ + (hva - (uintptr_t) region->host_mem));
+ }
+
+ TEST_ASSERT(false, "No mapping to a guest physical address, "
+ "hva: %p", hva);
+ return -1;
+}
+
+/* VM Create IRQ Chip
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Creates an interrupt controller chip for the VM specified by vm.
+ */
+void vm_create_irqchip(struct kvm_vm *vm)
+{
+ int ret;
+
+ ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
+ TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+
+ vm->has_irqchip = true;
+}
+
+/* VM VCPU State
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to structure that describes the state of the VCPU.
+ *
+ * Locates and returns a pointer to a structure that describes the
+ * state of the VCPU with the given vcpuid.
+ */
+struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ return vcpu->state;
+}
+
+/* VM VCPU Run
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Switch to executing the code for the VCPU given by vcpuid, within the VM
+ * given by vm.
+ */
+void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ int ret = _vcpu_run(vm, vcpuid);
+ TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+}
+
+int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int rc;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ do {
+ rc = ioctl(vcpu->fd, KVM_RUN, NULL);
+ } while (rc == -1 && errno == EINTR);
+ return rc;
+}
+
+/* VM VCPU Set MP State
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * mp_state - mp_state to be set
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the MP state of the VCPU given by vcpuid, to the state given
+ * by mp_state.
+ */
+void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_mp_state *mp_state)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
+ TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+}
+
+/* VM VCPU Regs Get
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args:
+ * regs - current state of VCPU regs
+ *
+ * Return: None
+ *
+ * Obtains the current register state for the VCPU specified by vcpuid
+ * and stores it at the location given by regs.
+ */
+void vcpu_regs_get(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_regs *regs)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
+ TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+/* VM VCPU Regs Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * regs - Values to set VCPU regs to
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the regs of the VCPU specified by vcpuid to the values
+ * given by regs.
+ */
+void vcpu_regs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_regs *regs)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Set the regs. */
+ ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
+ TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
+ TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Set the regs. */
+ ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
+ TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+/* VCPU Get MSR
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * msr_index - Index of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
+ *
+ * Get value of MSR for VCPU.
+ */
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ struct {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+ } buffer = {};
+ int r;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ buffer.header.nmsrs = 1;
+ buffer.entry.index = msr_index;
+ r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
+ TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
+ " rc: %i errno: %i", r, errno);
+
+ return buffer.entry.data;
+}
+
+/* VCPU Set MSR
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * msr_index - Index of MSR
+ * msr_value - New value of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, nothing. On failure a TEST_ASSERT is produced.
+ *
+ * Set value of MSR for VCPU.
+ */
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+ uint64_t msr_value)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ struct {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+ } buffer = {};
+ int r;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ memset(&buffer, 0, sizeof(buffer));
+ buffer.header.nmsrs = 1;
+ buffer.entry.index = msr_index;
+ buffer.entry.data = msr_value;
+ r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
+ TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
+ " rc: %i errno: %i", r, errno);
+}
+
+/* VM VCPU Args Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * num - number of arguments
+ * ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first num function input arguments to the values
+ * given as variable args. Each of the variable args is expected to
+ * be of type uint64_t.
+ */
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+ va_list ap;
+ struct kvm_regs regs;
+
+ TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
+ " num: %u\n",
+ num);
+
+ va_start(ap, num);
+ vcpu_regs_get(vm, vcpuid, &regs);
+
+ if (num >= 1)
+ regs.rdi = va_arg(ap, uint64_t);
+
+ if (num >= 2)
+ regs.rsi = va_arg(ap, uint64_t);
+
+ if (num >= 3)
+ regs.rdx = va_arg(ap, uint64_t);
+
+ if (num >= 4)
+ regs.rcx = va_arg(ap, uint64_t);
+
+ if (num >= 5)
+ regs.r8 = va_arg(ap, uint64_t);
+
+ if (num >= 6)
+ regs.r9 = va_arg(ap, uint64_t);
+
+ vcpu_regs_set(vm, vcpuid, &regs);
+ va_end(ap);
+}
+
+/* VM VCPU System Regs Get
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args:
+ * sregs - current state of VCPU system regs
+ *
+ * Return: None
+ *
+ * Obtains the current system register state for the VCPU specified by
+ * vcpuid and stores it at the location given by sregs.
+ */
+void vcpu_sregs_get(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ /* Get the regs. */
+ ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
+ TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+/* VM VCPU System Regs Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * sregs - Values to set VCPU system regs to
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the system regs of the VCPU specified by vcpuid to the values
+ * given by sregs.
+ */
+void vcpu_sregs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+ int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
+ TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+}
+
+int _vcpu_sregs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
+}
+
+/* VCPU Ioctl
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * cmd - Ioctl number
+ * arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a VCPU fd.
+ */
+void vcpu_ioctl(struct kvm_vm *vm,
+ uint32_t vcpuid, unsigned long cmd, void *arg)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ ret = ioctl(vcpu->fd, cmd, arg);
+ TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
+ cmd, ret, errno, strerror(errno));
+}
+
+/* VM Ioctl
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * cmd - Ioctl number
+ * arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a VM fd.
+ */
+void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+{
+ int ret;
+
+ ret = ioctl(vm->fd, cmd, arg);
+ TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
+ cmd, ret, errno, strerror(errno));
+}
+
+/* VM Dump
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * indent - Left margin indent amount
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VM given by vm, to the FILE stream
+ * given by stream.
+ */
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ struct userspace_mem_region *region;
+ struct vcpu *vcpu;
+
+ fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
+ fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
+ fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
+ fprintf(stream, "%*sMem Regions:\n", indent, "");
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
+ "host_virt: %p\n", indent + 2, "",
+ (uint64_t) region->region.guest_phys_addr,
+ (uint64_t) region->region.memory_size,
+ region->host_mem);
+ fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
+ sparsebit_dump(stream, region->unused_phy_pages, 0);
+ }
+ fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
+ sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
+ fprintf(stream, "%*spgd_created: %u\n", indent, "",
+ vm->pgd_created);
+ if (vm->pgd_created) {
+ fprintf(stream, "%*sVirtual Translation Tables:\n",
+ indent + 2, "");
+ virt_dump(stream, vm, indent + 4);
+ }
+ fprintf(stream, "%*sVCPUs:\n", indent, "");
+ for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next)
+ vcpu_dump(stream, vm, vcpu->id, indent + 2);
+}
+
+/* VM VCPU Dump
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * indent - Left margin indent amount
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VCPU specified by vcpuid, within the VM
+ * given by vm, to the FILE stream given by stream.
+ */
+void vcpu_dump(FILE *stream, struct kvm_vm *vm,
+ uint32_t vcpuid, uint8_t indent)
+{
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+
+ fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
+
+ fprintf(stream, "%*sregs:\n", indent + 2, "");
+ vcpu_regs_get(vm, vcpuid, &regs);
+ regs_dump(stream, &regs, indent + 4);
+
+ fprintf(stream, "%*ssregs:\n", indent + 2, "");
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+ sregs_dump(stream, &sregs, indent + 4);
+}
+
+/* Known KVM exit reasons */
+static struct exit_reason {
+ unsigned int reason;
+ const char *name;
+} exit_reasons_known[] = {
+ {KVM_EXIT_UNKNOWN, "UNKNOWN"},
+ {KVM_EXIT_EXCEPTION, "EXCEPTION"},
+ {KVM_EXIT_IO, "IO"},
+ {KVM_EXIT_HYPERCALL, "HYPERCALL"},
+ {KVM_EXIT_DEBUG, "DEBUG"},
+ {KVM_EXIT_HLT, "HLT"},
+ {KVM_EXIT_MMIO, "MMIO"},
+ {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
+ {KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
+ {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
+ {KVM_EXIT_INTR, "INTR"},
+ {KVM_EXIT_SET_TPR, "SET_TPR"},
+ {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
+ {KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
+ {KVM_EXIT_S390_RESET, "S390_RESET"},
+ {KVM_EXIT_DCR, "DCR"},
+ {KVM_EXIT_NMI, "NMI"},
+ {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
+ {KVM_EXIT_OSI, "OSI"},
+ {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
+#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
+ {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
+#endif
+};
+
+/* Exit Reason String
+ *
+ * Input Args:
+ * exit_reason - Exit reason
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Constant string pointer describing the exit reason.
+ *
+ * Locates and returns a constant string that describes the KVM exit
+ * reason given by exit_reason. If no such string is found, a constant
+ * string of "Unknown" is returned.
+ */
+const char *exit_reason_str(unsigned int exit_reason)
+{
+ unsigned int n1;
+
+ for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
+ if (exit_reason == exit_reasons_known[n1].reason)
+ return exit_reasons_known[n1].name;
+ }
+
+ return "Unknown";
+}
+
+/* Physical Page Allocate
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * paddr_min - Physical address minimum
+ * memslot - Memory region to allocate page from
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting physical address
+ *
+ * Within the VM specified by vm, locates an available physical page
+ * at or above paddr_min. If found, the page is marked as in use
+ * and its address is returned. A TEST_ASSERT failure occurs if no
+ * page is available at or above paddr_min.
+ */
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
+ vm_paddr_t paddr_min, uint32_t memslot)
+{
+ struct userspace_mem_region *region;
+ sparsebit_idx_t pg;
+
+ TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
+ "not divisible by page size.\n"
+ " paddr_min: 0x%lx page_size: 0x%x",
+ paddr_min, vm->page_size);
+
+ /* Locate memory region. */
+ region = memslot2region(vm, memslot);
+
+ /* Locate next available physical page at or above paddr_min. */
+ pg = paddr_min >> vm->page_shift;
+
+ if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
+ pg = sparsebit_next_set(region->unused_phy_pages, pg);
+ if (pg == 0) {
+ fprintf(stderr, "No guest physical page available, "
+ "paddr_min: 0x%lx page_size: 0x%x memslot: %u",
+ paddr_min, vm->page_size, memslot);
+ fputs("---- vm dump ----\n", stderr);
+ vm_dump(stderr, vm, 2);
+ abort();
+ }
+ }
+
+ /* Specify page as in use and return its address. */
+ sparsebit_clear(region->unused_phy_pages, pg);
+
+ return pg * vm->page_size;
+}
+
+/* Address Guest Virtual to Host Virtual
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent host virtual address
+ */
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
+}
+
+void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct guest_args *args)
+{
+ struct kvm_run *run = vcpu_state(vm, vcpu_id);
+ struct kvm_regs regs;
+
+ memset(&regs, 0, sizeof(regs));
+ vcpu_regs_get(vm, vcpu_id, &regs);
+
+ args->port = run->io.port;
+ args->arg0 = regs.rdi;
+ args->arg1 = regs.rsi;
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
new file mode 100644
index 000000000..542ed606b
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -0,0 +1,72 @@
+/*
+ * tools/testing/selftests/kvm/lib/kvm_util.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#ifndef KVM_UTIL_INTERNAL_H
+#define KVM_UTIL_INTERNAL_H 1
+
+#include "sparsebit.h"
+
+#ifndef BITS_PER_BYTE
+#define BITS_PER_BYTE 8
+#endif
+
+#ifndef BITS_PER_LONG
+#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))
+#endif
+
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG)
+
+/* Concrete definition of struct kvm_vm. */
+struct userspace_mem_region {
+ struct userspace_mem_region *next, *prev;
+ struct kvm_userspace_memory_region region;
+ struct sparsebit *unused_phy_pages;
+ int fd;
+ off_t offset;
+ void *host_mem;
+ void *mmap_start;
+ size_t mmap_size;
+};
+
+struct vcpu {
+ struct vcpu *next, *prev;
+ uint32_t id;
+ int fd;
+ struct kvm_run *state;
+};
+
+struct kvm_vm {
+ int mode;
+ int kvm_fd;
+ int fd;
+ unsigned int page_size;
+ unsigned int page_shift;
+ uint64_t max_gfn;
+ struct vcpu *vcpu_head;
+ struct userspace_mem_region *userspace_mem_region_head;
+ struct sparsebit *vpages_valid;
+ struct sparsebit *vpages_mapped;
+
+ bool has_irqchip;
+ bool pgd_created;
+ vm_paddr_t pgd;
+ vm_vaddr_t gdt;
+ vm_vaddr_t tss;
+};
+
+struct vcpu *vcpu_find(struct kvm_vm *vm,
+ uint32_t vcpuid);
+void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot);
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+void regs_dump(FILE *stream, struct kvm_regs *regs,
+ uint8_t indent);
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
+ uint8_t indent);
+
+#endif
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
new file mode 100644
index 000000000..b132bc95d
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -0,0 +1,2087 @@
+/*
+ * Sparse bit array
+ *
+ * Copyright (C) 2018, Google LLC.
+ * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * This library provides functions to support a memory efficient bit array,
+ * with an index size of 2^64. A sparsebit array is allocated through
+ * the use sparsebit_alloc() and free'd via sparsebit_free(),
+ * such as in the following:
+ *
+ * struct sparsebit *s;
+ * s = sparsebit_alloc();
+ * sparsebit_free(&s);
+ *
+ * The struct sparsebit type resolves down to a struct sparsebit.
+ * Note that, sparsebit_free() takes a pointer to the sparsebit
+ * structure. This is so that sparsebit_free() is able to poison
+ * the pointer (e.g. set it to NULL) to the struct sparsebit before
+ * returning to the caller.
+ *
+ * Between the return of sparsebit_alloc() and the call of
+ * sparsebit_free(), there are multiple query and modifying operations
+ * that can be performed on the allocated sparsebit array. All of
+ * these operations take as a parameter the value returned from
+ * sparsebit_alloc() and most also take a bit index. Frequently
+ * used routines include:
+ *
+ * ---- Query Operations
+ * sparsebit_is_set(s, idx)
+ * sparsebit_is_clear(s, idx)
+ * sparsebit_any_set(s)
+ * sparsebit_first_set(s)
+ * sparsebit_next_set(s, prev_idx)
+ *
+ * ---- Modifying Operations
+ * sparsebit_set(s, idx)
+ * sparsebit_clear(s, idx)
+ * sparsebit_set_num(s, idx, num);
+ * sparsebit_clear_num(s, idx, num);
+ *
+ * A common operation, is to itterate over all the bits set in a test
+ * sparsebit array. This can be done via code with the following structure:
+ *
+ * sparsebit_idx_t idx;
+ * if (sparsebit_any_set(s)) {
+ * idx = sparsebit_first_set(s);
+ * do {
+ * ...
+ * idx = sparsebit_next_set(s, idx);
+ * } while (idx != 0);
+ * }
+ *
+ * The index of the first bit set needs to be obtained via
+ * sparsebit_first_set(), because sparsebit_next_set(), needs
+ * the index of the previously set. The sparsebit_idx_t type is
+ * unsigned, so there is no previous index before 0 that is available.
+ * Also, the call to sparsebit_first_set() is not made unless there
+ * is at least 1 bit in the array set. This is because sparsebit_first_set()
+ * aborts if sparsebit_first_set() is called with no bits set.
+ * It is the callers responsibility to assure that the
+ * sparsebit array has at least a single bit set before calling
+ * sparsebit_first_set().
+ *
+ * ==== Implementation Overview ====
+ * For the most part the internal implementation of sparsebit is
+ * opaque to the caller. One important implementation detail that the
+ * caller may need to be aware of is the spatial complexity of the
+ * implementation. This implementation of a sparsebit array is not
+ * only sparse, in that it uses memory proportional to the number of bits
+ * set. It is also efficient in memory usage when most of the bits are
+ * set.
+ *
+ * At a high-level the state of the bit settings are maintained through
+ * the use of a binary-search tree, where each node contains at least
+ * the following members:
+ *
+ * typedef uint64_t sparsebit_idx_t;
+ * typedef uint64_t sparsebit_num_t;
+ *
+ * sparsebit_idx_t idx;
+ * uint32_t mask;
+ * sparsebit_num_t num_after;
+ *
+ * The idx member contains the bit index of the first bit described by this
+ * node, while the mask member stores the setting of the first 32-bits.
+ * The setting of the bit at idx + n, where 0 <= n < 32, is located in the
+ * mask member at 1 << n.
+ *
+ * Nodes are sorted by idx and the bits described by two nodes will never
+ * overlap. The idx member is always aligned to the mask size, i.e. a
+ * multiple of 32.
+ *
+ * Beyond a typical implementation, the nodes in this implementation also
+ * contains a member named num_after. The num_after member holds the
+ * number of bits immediately after the mask bits that are contiguously set.
+ * The use of the num_after member allows this implementation to efficiently
+ * represent cases where most bits are set. For example, the case of all
+ * but the last two bits set, is represented by the following two nodes:
+ *
+ * node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0
+ * node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0
+ *
+ * ==== Invariants ====
+ * This implementation usses the following invariants:
+ *
+ * + Node are only used to represent bits that are set.
+ * Nodes with a mask of 0 and num_after of 0 are not allowed.
+ *
+ * + Sum of bits set in all the nodes is equal to the value of
+ * the struct sparsebit_pvt num_set member.
+ *
+ * + The setting of at least one bit is always described in a nodes
+ * mask (mask >= 1).
+ *
+ * + A node with all mask bits set only occurs when the last bit
+ * described by the previous node is not equal to this nodes
+ * starting index - 1. All such occurences of this condition are
+ * avoided by moving the setting of the nodes mask bits into
+ * the previous nodes num_after setting.
+ *
+ * + Node starting index is evenly divisible by the number of bits
+ * within a nodes mask member.
+ *
+ * + Nodes never represent a range of bits that wrap around the
+ * highest supported index.
+ *
+ * (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1)
+ *
+ * As a consequence of the above, the num_after member of a node
+ * will always be <=:
+ *
+ * maximum_index - nodes_starting_index - number_of_mask_bits
+ *
+ * + Nodes within the binary search tree are sorted based on each
+ * nodes starting index.
+ *
+ * + The range of bits described by any two nodes do not overlap. The
+ * range of bits described by a single node is:
+ *
+ * start: node->idx
+ * end (inclusive): node->idx + MASK_BITS + node->num_after - 1;
+ *
+ * Note, at times these invariants are temporarily violated for a
+ * specific portion of the code. For example, when setting a mask
+ * bit, there is a small delay between when the mask bit is set and the
+ * value in the struct sparsebit_pvt num_set member is updated. Other
+ * temporary violations occur when node_split() is called with a specified
+ * index and assures that a node where its mask represents the bit
+ * at the specified index exists. At times to do this node_split()
+ * must split an existing node into two nodes or create a node that
+ * has no bits set. Such temporary violations must be corrected before
+ * returning to the caller. These corrections are typically performed
+ * by the local function node_reduce().
+ */
+
+#include "test_util.h"
+#include "sparsebit.h"
+#include <limits.h>
+#include <assert.h>
+
+#define DUMP_LINE_MAX 100 /* Does not include indent amount */
+
+typedef uint32_t mask_t;
+#define MASK_BITS (sizeof(mask_t) * CHAR_BIT)
+
+struct node {
+ struct node *parent;
+ struct node *left;
+ struct node *right;
+ sparsebit_idx_t idx; /* index of least-significant bit in mask */
+ sparsebit_num_t num_after; /* num contiguously set after mask */
+ mask_t mask;
+};
+
+struct sparsebit {
+ /*
+ * Points to root node of the binary search
+ * tree. Equal to NULL when no bits are set in
+ * the entire sparsebit array.
+ */
+ struct node *root;
+
+ /*
+ * A redundant count of the total number of bits set. Used for
+ * diagnostic purposes and to change the time complexity of
+ * sparsebit_num_set() from O(n) to O(1).
+ * Note: Due to overflow, a value of 0 means none or all set.
+ */
+ sparsebit_num_t num_set;
+};
+
+/* Returns the number of set bits described by the settings
+ * of the node pointed to by nodep.
+ */
+static sparsebit_num_t node_num_set(struct node *nodep)
+{
+ return nodep->num_after + __builtin_popcount(nodep->mask);
+}
+
+/* Returns a pointer to the node that describes the
+ * lowest bit index.
+ */
+static struct node *node_first(struct sparsebit *s)
+{
+ struct node *nodep;
+
+ for (nodep = s->root; nodep && nodep->left; nodep = nodep->left)
+ ;
+
+ return nodep;
+}
+
+/* Returns a pointer to the node that describes the
+ * lowest bit index > the index of the node pointed to by np.
+ * Returns NULL if no node with a higher index exists.
+ */
+static struct node *node_next(struct sparsebit *s, struct node *np)
+{
+ struct node *nodep = np;
+
+ /*
+ * If current node has a right child, next node is the left-most
+ * of the right child.
+ */
+ if (nodep->right) {
+ for (nodep = nodep->right; nodep->left; nodep = nodep->left)
+ ;
+ return nodep;
+ }
+
+ /*
+ * No right child. Go up until node is left child of a parent.
+ * That parent is then the next node.
+ */
+ while (nodep->parent && nodep == nodep->parent->right)
+ nodep = nodep->parent;
+
+ return nodep->parent;
+}
+
+/* Searches for and returns a pointer to the node that describes the
+ * highest index < the index of the node pointed to by np.
+ * Returns NULL if no node with a lower index exists.
+ */
+static struct node *node_prev(struct sparsebit *s, struct node *np)
+{
+ struct node *nodep = np;
+
+ /*
+ * If current node has a left child, next node is the right-most
+ * of the left child.
+ */
+ if (nodep->left) {
+ for (nodep = nodep->left; nodep->right; nodep = nodep->right)
+ ;
+ return (struct node *) nodep;
+ }
+
+ /*
+ * No left child. Go up until node is right child of a parent.
+ * That parent is then the next node.
+ */
+ while (nodep->parent && nodep == nodep->parent->left)
+ nodep = nodep->parent;
+
+ return (struct node *) nodep->parent;
+}
+
+
+/* Allocates space to hold a copy of the node sub-tree pointed to by
+ * subtree and duplicates the bit settings to the newly allocated nodes.
+ * Returns the newly allocated copy of subtree.
+ */
+static struct node *node_copy_subtree(struct node *subtree)
+{
+ struct node *root;
+
+ /* Duplicate the node at the root of the subtree */
+ root = calloc(1, sizeof(*root));
+ if (!root) {
+ perror("calloc");
+ abort();
+ }
+
+ root->idx = subtree->idx;
+ root->mask = subtree->mask;
+ root->num_after = subtree->num_after;
+
+ /* As needed, recursively duplicate the left and right subtrees */
+ if (subtree->left) {
+ root->left = node_copy_subtree(subtree->left);
+ root->left->parent = root;
+ }
+
+ if (subtree->right) {
+ root->right = node_copy_subtree(subtree->right);
+ root->right->parent = root;
+ }
+
+ return root;
+}
+
+/* Searches for and returns a pointer to the node that describes the setting
+ * of the bit given by idx. A node describes the setting of a bit if its
+ * index is within the bits described by the mask bits or the number of
+ * contiguous bits set after the mask. Returns NULL if there is no such node.
+ */
+static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep;
+
+ /* Find the node that describes the setting of the bit at idx */
+ for (nodep = s->root; nodep;
+ nodep = nodep->idx > idx ? nodep->left : nodep->right) {
+ if (idx >= nodep->idx &&
+ idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
+ break;
+ }
+
+ return nodep;
+}
+
+/* Entry Requirements:
+ * + A node that describes the setting of idx is not already present.
+ *
+ * Adds a new node to describe the setting of the bit at the index given
+ * by idx. Returns a pointer to the newly added node.
+ *
+ * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced.
+ */
+static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep, *parentp, *prev;
+
+ /* Allocate and initialize the new node. */
+ nodep = calloc(1, sizeof(*nodep));
+ if (!nodep) {
+ perror("calloc");
+ abort();
+ }
+
+ nodep->idx = idx & -MASK_BITS;
+
+ /* If no nodes, set it up as the root node. */
+ if (!s->root) {
+ s->root = nodep;
+ return nodep;
+ }
+
+ /*
+ * Find the parent where the new node should be attached
+ * and add the node there.
+ */
+ parentp = s->root;
+ while (true) {
+ if (idx < parentp->idx) {
+ if (!parentp->left) {
+ parentp->left = nodep;
+ nodep->parent = parentp;
+ break;
+ }
+ parentp = parentp->left;
+ } else {
+ assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1);
+ if (!parentp->right) {
+ parentp->right = nodep;
+ nodep->parent = parentp;
+ break;
+ }
+ parentp = parentp->right;
+ }
+ }
+
+ /*
+ * Does num_after bits of previous node overlap with the mask
+ * of the new node? If so set the bits in the new nodes mask
+ * and reduce the previous nodes num_after.
+ */
+ prev = node_prev(s, nodep);
+ while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) {
+ unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1)
+ - nodep->idx;
+ assert(prev->num_after > 0);
+ assert(n1 < MASK_BITS);
+ assert(!(nodep->mask & (1 << n1)));
+ nodep->mask |= (1 << n1);
+ prev->num_after--;
+ }
+
+ return nodep;
+}
+
+/* Returns whether all the bits in the sparsebit array are set. */
+bool sparsebit_all_set(struct sparsebit *s)
+{
+ /*
+ * If any nodes there must be at least one bit set. Only case
+ * where a bit is set and total num set is 0, is when all bits
+ * are set.
+ */
+ return s->root && s->num_set == 0;
+}
+
+/* Clears all bits described by the node pointed to by nodep, then
+ * removes the node.
+ */
+static void node_rm(struct sparsebit *s, struct node *nodep)
+{
+ struct node *tmp;
+ sparsebit_num_t num_set;
+
+ num_set = node_num_set(nodep);
+ assert(s->num_set >= num_set || sparsebit_all_set(s));
+ s->num_set -= node_num_set(nodep);
+
+ /* Have both left and right child */
+ if (nodep->left && nodep->right) {
+ /*
+ * Move left children to the leftmost leaf node
+ * of the right child.
+ */
+ for (tmp = nodep->right; tmp->left; tmp = tmp->left)
+ ;
+ tmp->left = nodep->left;
+ nodep->left = NULL;
+ tmp->left->parent = tmp;
+ }
+
+ /* Left only child */
+ if (nodep->left) {
+ if (!nodep->parent) {
+ s->root = nodep->left;
+ nodep->left->parent = NULL;
+ } else {
+ nodep->left->parent = nodep->parent;
+ if (nodep == nodep->parent->left)
+ nodep->parent->left = nodep->left;
+ else {
+ assert(nodep == nodep->parent->right);
+ nodep->parent->right = nodep->left;
+ }
+ }
+
+ nodep->parent = nodep->left = nodep->right = NULL;
+ free(nodep);
+
+ return;
+ }
+
+
+ /* Right only child */
+ if (nodep->right) {
+ if (!nodep->parent) {
+ s->root = nodep->right;
+ nodep->right->parent = NULL;
+ } else {
+ nodep->right->parent = nodep->parent;
+ if (nodep == nodep->parent->left)
+ nodep->parent->left = nodep->right;
+ else {
+ assert(nodep == nodep->parent->right);
+ nodep->parent->right = nodep->right;
+ }
+ }
+
+ nodep->parent = nodep->left = nodep->right = NULL;
+ free(nodep);
+
+ return;
+ }
+
+ /* Leaf Node */
+ if (!nodep->parent) {
+ s->root = NULL;
+ } else {
+ if (nodep->parent->left == nodep)
+ nodep->parent->left = NULL;
+ else {
+ assert(nodep == nodep->parent->right);
+ nodep->parent->right = NULL;
+ }
+ }
+
+ nodep->parent = nodep->left = nodep->right = NULL;
+ free(nodep);
+
+ return;
+}
+
+/* Splits the node containing the bit at idx so that there is a node
+ * that starts at the specified index. If no such node exists, a new
+ * node at the specified index is created. Returns the new node.
+ *
+ * idx must start of a mask boundary.
+ */
+static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep1, *nodep2;
+ sparsebit_idx_t offset;
+ sparsebit_num_t orig_num_after;
+
+ assert(!(idx % MASK_BITS));
+
+ /*
+ * Is there a node that describes the setting of idx?
+ * If not, add it.
+ */
+ nodep1 = node_find(s, idx);
+ if (!nodep1)
+ return node_add(s, idx);
+
+ /*
+ * All done if the starting index of the node is where the
+ * split should occur.
+ */
+ if (nodep1->idx == idx)
+ return nodep1;
+
+ /*
+ * Split point not at start of mask, so it must be part of
+ * bits described by num_after.
+ */
+
+ /*
+ * Calculate offset within num_after for where the split is
+ * to occur.
+ */
+ offset = idx - (nodep1->idx + MASK_BITS);
+ orig_num_after = nodep1->num_after;
+
+ /*
+ * Add a new node to describe the bits starting at
+ * the split point.
+ */
+ nodep1->num_after = offset;
+ nodep2 = node_add(s, idx);
+
+ /* Move bits after the split point into the new node */
+ nodep2->num_after = orig_num_after - offset;
+ if (nodep2->num_after >= MASK_BITS) {
+ nodep2->mask = ~(mask_t) 0;
+ nodep2->num_after -= MASK_BITS;
+ } else {
+ nodep2->mask = (1 << nodep2->num_after) - 1;
+ nodep2->num_after = 0;
+ }
+
+ return nodep2;
+}
+
+/* Iteratively reduces the node pointed to by nodep and its adjacent
+ * nodes into a more compact form. For example, a node with a mask with
+ * all bits set adjacent to a previous node, will get combined into a
+ * single node with an increased num_after setting.
+ *
+ * After each reduction, a further check is made to see if additional
+ * reductions are possible with the new previous and next nodes. Note,
+ * a search for a reduction is only done across the nodes nearest nodep
+ * and those that became part of a reduction. Reductions beyond nodep
+ * and the adjacent nodes that are reduced are not discovered. It is the
+ * responsibility of the caller to pass a nodep that is within one node
+ * of each possible reduction.
+ *
+ * This function does not fix the temporary violation of all invariants.
+ * For example it does not fix the case where the bit settings described
+ * by two or more nodes overlap. Such a violation introduces the potential
+ * complication of a bit setting for a specific index having different settings
+ * in different nodes. This would then introduce the further complication
+ * of which node has the correct setting of the bit and thus such conditions
+ * are not allowed.
+ *
+ * This function is designed to fix invariant violations that are introduced
+ * by node_split() and by changes to the nodes mask or num_after members.
+ * For example, when setting a bit within a nodes mask, the function that
+ * sets the bit doesn't have to worry about whether the setting of that
+ * bit caused the mask to have leading only or trailing only bits set.
+ * Instead, the function can call node_reduce(), with nodep equal to the
+ * node address that it set a mask bit in, and node_reduce() will notice
+ * the cases of leading or trailing only bits and that there is an
+ * adjacent node that the bit settings could be merged into.
+ *
+ * This implementation specifically detects and corrects violation of the
+ * following invariants:
+ *
+ * + Node are only used to represent bits that are set.
+ * Nodes with a mask of 0 and num_after of 0 are not allowed.
+ *
+ * + The setting of at least one bit is always described in a nodes
+ * mask (mask >= 1).
+ *
+ * + A node with all mask bits set only occurs when the last bit
+ * described by the previous node is not equal to this nodes
+ * starting index - 1. All such occurences of this condition are
+ * avoided by moving the setting of the nodes mask bits into
+ * the previous nodes num_after setting.
+ */
+static void node_reduce(struct sparsebit *s, struct node *nodep)
+{
+ bool reduction_performed;
+
+ do {
+ reduction_performed = false;
+ struct node *prev, *next, *tmp;
+
+ /* 1) Potential reductions within the current node. */
+
+ /* Nodes with all bits cleared may be removed. */
+ if (nodep->mask == 0 && nodep->num_after == 0) {
+ /*
+ * About to remove the node pointed to by
+ * nodep, which normally would cause a problem
+ * for the next pass through the reduction loop,
+ * because the node at the starting point no longer
+ * exists. This potential problem is handled
+ * by first remembering the location of the next
+ * or previous nodes. Doesn't matter which, because
+ * once the node at nodep is removed, there will be
+ * no other nodes between prev and next.
+ *
+ * Note, the checks performed on nodep against both
+ * both prev and next both check for an adjacent
+ * node that can be reduced into a single node. As
+ * such, after removing the node at nodep, doesn't
+ * matter whether the nodep for the next pass
+ * through the loop is equal to the previous pass
+ * prev or next node. Either way, on the next pass
+ * the one not selected will become either the
+ * prev or next node.
+ */
+ tmp = node_next(s, nodep);
+ if (!tmp)
+ tmp = node_prev(s, nodep);
+
+ node_rm(s, nodep);
+ nodep = NULL;
+
+ nodep = tmp;
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * When the mask is 0, can reduce the amount of num_after
+ * bits by moving the initial num_after bits into the mask.
+ */
+ if (nodep->mask == 0) {
+ assert(nodep->num_after != 0);
+ assert(nodep->idx + MASK_BITS > nodep->idx);
+
+ nodep->idx += MASK_BITS;
+
+ if (nodep->num_after >= MASK_BITS) {
+ nodep->mask = ~0;
+ nodep->num_after -= MASK_BITS;
+ } else {
+ nodep->mask = (1u << nodep->num_after) - 1;
+ nodep->num_after = 0;
+ }
+
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * 2) Potential reductions between the current and
+ * previous nodes.
+ */
+ prev = node_prev(s, nodep);
+ if (prev) {
+ sparsebit_idx_t prev_highest_bit;
+
+ /* Nodes with no bits set can be removed. */
+ if (prev->mask == 0 && prev->num_after == 0) {
+ node_rm(s, prev);
+
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * All mask bits set and previous node has
+ * adjacent index.
+ */
+ if (nodep->mask + 1 == 0 &&
+ prev->idx + MASK_BITS == nodep->idx) {
+ prev->num_after += MASK_BITS + nodep->num_after;
+ nodep->mask = 0;
+ nodep->num_after = 0;
+
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * Is node adjacent to previous node and the node
+ * contains a single contiguous range of bits
+ * starting from the beginning of the mask?
+ */
+ prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after;
+ if (prev_highest_bit + 1 == nodep->idx &&
+ (nodep->mask | (nodep->mask >> 1)) == nodep->mask) {
+ /*
+ * How many contiguous bits are there?
+ * Is equal to the total number of set
+ * bits, due to an earlier check that
+ * there is a single contiguous range of
+ * set bits.
+ */
+ unsigned int num_contiguous
+ = __builtin_popcount(nodep->mask);
+ assert((num_contiguous > 0) &&
+ ((1ULL << num_contiguous) - 1) == nodep->mask);
+
+ prev->num_after += num_contiguous;
+ nodep->mask = 0;
+
+ /*
+ * For predictable performance, handle special
+ * case where all mask bits are set and there
+ * is a non-zero num_after setting. This code
+ * is functionally correct without the following
+ * conditionalized statements, but without them
+ * the value of num_after is only reduced by
+ * the number of mask bits per pass. There are
+ * cases where num_after can be close to 2^64.
+ * Without this code it could take nearly
+ * (2^64) / 32 passes to perform the full
+ * reduction.
+ */
+ if (num_contiguous == MASK_BITS) {
+ prev->num_after += nodep->num_after;
+ nodep->num_after = 0;
+ }
+
+ reduction_performed = true;
+ continue;
+ }
+ }
+
+ /*
+ * 3) Potential reductions between the current and
+ * next nodes.
+ */
+ next = node_next(s, nodep);
+ if (next) {
+ /* Nodes with no bits set can be removed. */
+ if (next->mask == 0 && next->num_after == 0) {
+ node_rm(s, next);
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * Is next node index adjacent to current node
+ * and has a mask with all bits set?
+ */
+ if (next->idx == nodep->idx + MASK_BITS + nodep->num_after &&
+ next->mask == ~(mask_t) 0) {
+ nodep->num_after += MASK_BITS;
+ next->mask = 0;
+ nodep->num_after += next->num_after;
+ next->num_after = 0;
+
+ node_rm(s, next);
+ next = NULL;
+
+ reduction_performed = true;
+ continue;
+ }
+ }
+ } while (nodep && reduction_performed);
+}
+
+/* Returns whether the bit at the index given by idx, within the
+ * sparsebit array is set or not.
+ */
+bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep;
+
+ /* Find the node that describes the setting of the bit at idx */
+ for (nodep = s->root; nodep;
+ nodep = nodep->idx > idx ? nodep->left : nodep->right)
+ if (idx >= nodep->idx &&
+ idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
+ goto have_node;
+
+ return false;
+
+have_node:
+ /* Bit is set if it is any of the bits described by num_after */
+ if (nodep->num_after && idx >= nodep->idx + MASK_BITS)
+ return true;
+
+ /* Is the corresponding mask bit set */
+ assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS);
+ return !!(nodep->mask & (1 << (idx - nodep->idx)));
+}
+
+/* Within the sparsebit array pointed to by s, sets the bit
+ * at the index given by idx.
+ */
+static void bit_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep;
+
+ /* Skip bits that are already set */
+ if (sparsebit_is_set(s, idx))
+ return;
+
+ /*
+ * Get a node where the bit at idx is described by the mask.
+ * The node_split will also create a node, if there isn't
+ * already a node that describes the setting of bit.
+ */
+ nodep = node_split(s, idx & -MASK_BITS);
+
+ /* Set the bit within the nodes mask */
+ assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
+ assert(!(nodep->mask & (1 << (idx - nodep->idx))));
+ nodep->mask |= 1 << (idx - nodep->idx);
+ s->num_set++;
+
+ node_reduce(s, nodep);
+}
+
+/* Within the sparsebit array pointed to by s, clears the bit
+ * at the index given by idx.
+ */
+static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep;
+
+ /* Skip bits that are already cleared */
+ if (!sparsebit_is_set(s, idx))
+ return;
+
+ /* Is there a node that describes the setting of this bit? */
+ nodep = node_find(s, idx);
+ if (!nodep)
+ return;
+
+ /*
+ * If a num_after bit, split the node, so that the bit is
+ * part of a node mask.
+ */
+ if (idx >= nodep->idx + MASK_BITS)
+ nodep = node_split(s, idx & -MASK_BITS);
+
+ /*
+ * After node_split above, bit at idx should be within the mask.
+ * Clear that bit.
+ */
+ assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
+ assert(nodep->mask & (1 << (idx - nodep->idx)));
+ nodep->mask &= ~(1 << (idx - nodep->idx));
+ assert(s->num_set > 0 || sparsebit_all_set(s));
+ s->num_set--;
+
+ node_reduce(s, nodep);
+}
+
+/* Recursively dumps to the FILE stream given by stream the contents
+ * of the sub-tree of nodes pointed to by nodep. Each line of output
+ * is prefixed by the number of spaces given by indent. On each
+ * recursion, the indent amount is increased by 2. This causes nodes
+ * at each level deeper into the binary search tree to be displayed
+ * with a greater indent.
+ */
+static void dump_nodes(FILE *stream, struct node *nodep,
+ unsigned int indent)
+{
+ char *node_type;
+
+ /* Dump contents of node */
+ if (!nodep->parent)
+ node_type = "root";
+ else if (nodep == nodep->parent->left)
+ node_type = "left";
+ else {
+ assert(nodep == nodep->parent->right);
+ node_type = "right";
+ }
+ fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep);
+ fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "",
+ nodep->parent, nodep->left, nodep->right);
+ fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n",
+ indent, "", nodep->idx, nodep->mask, nodep->num_after);
+
+ /* If present, dump contents of left child nodes */
+ if (nodep->left)
+ dump_nodes(stream, nodep->left, indent + 2);
+
+ /* If present, dump contents of right child nodes */
+ if (nodep->right)
+ dump_nodes(stream, nodep->right, indent + 2);
+}
+
+static inline sparsebit_idx_t node_first_set(struct node *nodep, int start)
+{
+ mask_t leading = (mask_t)1 << start;
+ int n1 = __builtin_ctz(nodep->mask & -leading);
+
+ return nodep->idx + n1;
+}
+
+static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)
+{
+ mask_t leading = (mask_t)1 << start;
+ int n1 = __builtin_ctz(~nodep->mask & -leading);
+
+ return nodep->idx + n1;
+}
+
+/* Dumps to the FILE stream specified by stream, the implementation dependent
+ * internal state of s. Each line of output is prefixed with the number
+ * of spaces given by indent. The output is completely implementation
+ * dependent and subject to change. Output from this function should only
+ * be used for diagnostic purposes. For example, this function can be
+ * used by test cases after they detect an unexpected condition, as a means
+ * to capture diagnostic information.
+ */
+static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,
+ unsigned int indent)
+{
+ /* Dump the contents of s */
+ fprintf(stream, "%*sroot: %p\n", indent, "", s->root);
+ fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set);
+
+ if (s->root)
+ dump_nodes(stream, s->root, indent);
+}
+
+/* Allocates and returns a new sparsebit array. The initial state
+ * of the newly allocated sparsebit array has all bits cleared.
+ */
+struct sparsebit *sparsebit_alloc(void)
+{
+ struct sparsebit *s;
+
+ /* Allocate top level structure. */
+ s = calloc(1, sizeof(*s));
+ if (!s) {
+ perror("calloc");
+ abort();
+ }
+
+ return s;
+}
+
+/* Frees the implementation dependent data for the sparsebit array
+ * pointed to by s and poisons the pointer to that data.
+ */
+void sparsebit_free(struct sparsebit **sbitp)
+{
+ struct sparsebit *s = *sbitp;
+
+ if (!s)
+ return;
+
+ sparsebit_clear_all(s);
+ free(s);
+ *sbitp = NULL;
+}
+
+/* Makes a copy of the sparsebit array given by s, to the sparsebit
+ * array given by d. Note, d must have already been allocated via
+ * sparsebit_alloc(). It can though already have bits set, which
+ * if different from src will be cleared.
+ */
+void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
+{
+ /* First clear any bits already set in the destination */
+ sparsebit_clear_all(d);
+
+ if (s->root) {
+ d->root = node_copy_subtree(s->root);
+ d->num_set = s->num_set;
+ }
+}
+
+/* Returns whether num consecutive bits starting at idx are all set. */
+bool sparsebit_is_set_num(struct sparsebit *s,
+ sparsebit_idx_t idx, sparsebit_num_t num)
+{
+ sparsebit_idx_t next_cleared;
+
+ assert(num > 0);
+ assert(idx + num - 1 >= idx);
+
+ /* With num > 0, the first bit must be set. */
+ if (!sparsebit_is_set(s, idx))
+ return false;
+
+ /* Find the next cleared bit */
+ next_cleared = sparsebit_next_clear(s, idx);
+
+ /*
+ * If no cleared bits beyond idx, then there are at least num
+ * set bits. idx + num doesn't wrap. Otherwise check if
+ * there are enough set bits between idx and the next cleared bit.
+ */
+ return next_cleared == 0 || next_cleared - idx >= num;
+}
+
+/* Returns whether the bit at the index given by idx. */
+bool sparsebit_is_clear(struct sparsebit *s,
+ sparsebit_idx_t idx)
+{
+ return !sparsebit_is_set(s, idx);
+}
+
+/* Returns whether num consecutive bits starting at idx are all cleared. */
+bool sparsebit_is_clear_num(struct sparsebit *s,
+ sparsebit_idx_t idx, sparsebit_num_t num)
+{
+ sparsebit_idx_t next_set;
+
+ assert(num > 0);
+ assert(idx + num - 1 >= idx);
+
+ /* With num > 0, the first bit must be cleared. */
+ if (!sparsebit_is_clear(s, idx))
+ return false;
+
+ /* Find the next set bit */
+ next_set = sparsebit_next_set(s, idx);
+
+ /*
+ * If no set bits beyond idx, then there are at least num
+ * cleared bits. idx + num doesn't wrap. Otherwise check if
+ * there are enough cleared bits between idx and the next set bit.
+ */
+ return next_set == 0 || next_set - idx >= num;
+}
+
+/* Returns the total number of bits set. Note: 0 is also returned for
+ * the case of all bits set. This is because with all bits set, there
+ * is 1 additional bit set beyond what can be represented in the return
+ * value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,
+ * to determine if the sparsebit array has any bits set.
+ */
+sparsebit_num_t sparsebit_num_set(struct sparsebit *s)
+{
+ return s->num_set;
+}
+
+/* Returns whether any bit is set in the sparsebit array. */
+bool sparsebit_any_set(struct sparsebit *s)
+{
+ /*
+ * Nodes only describe set bits. If any nodes then there
+ * is at least 1 bit set.
+ */
+ if (!s->root)
+ return false;
+
+ /*
+ * Every node should have a non-zero mask. For now will
+ * just assure that the root node has a non-zero mask,
+ * which is a quick check that at least 1 bit is set.
+ */
+ assert(s->root->mask != 0);
+ assert(s->num_set > 0 ||
+ (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS &&
+ s->root->mask == ~(mask_t) 0));
+
+ return true;
+}
+
+/* Returns whether all the bits in the sparsebit array are cleared. */
+bool sparsebit_all_clear(struct sparsebit *s)
+{
+ return !sparsebit_any_set(s);
+}
+
+/* Returns whether all the bits in the sparsebit array are set. */
+bool sparsebit_any_clear(struct sparsebit *s)
+{
+ return !sparsebit_all_set(s);
+}
+
+/* Returns the index of the first set bit. Abort if no bits are set.
+ */
+sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
+{
+ struct node *nodep;
+
+ /* Validate at least 1 bit is set */
+ assert(sparsebit_any_set(s));
+
+ nodep = node_first(s);
+ return node_first_set(nodep, 0);
+}
+
+/* Returns the index of the first cleared bit. Abort if
+ * no bits are cleared.
+ */
+sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
+{
+ struct node *nodep1, *nodep2;
+
+ /* Validate at least 1 bit is cleared. */
+ assert(sparsebit_any_clear(s));
+
+ /* If no nodes or first node index > 0 then lowest cleared is 0 */
+ nodep1 = node_first(s);
+ if (!nodep1 || nodep1->idx > 0)
+ return 0;
+
+ /* Does the mask in the first node contain any cleared bits. */
+ if (nodep1->mask != ~(mask_t) 0)
+ return node_first_clear(nodep1, 0);
+
+ /*
+ * All mask bits set in first node. If there isn't a second node
+ * then the first cleared bit is the first bit after the bits
+ * described by the first node.
+ */
+ nodep2 = node_next(s, nodep1);
+ if (!nodep2) {
+ /*
+ * No second node. First cleared bit is first bit beyond
+ * bits described by first node.
+ */
+ assert(nodep1->mask == ~(mask_t) 0);
+ assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0);
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+ }
+
+ /*
+ * There is a second node.
+ * If it is not adjacent to the first node, then there is a gap
+ * of cleared bits between the nodes, and the first cleared bit
+ * is the first bit within the gap.
+ */
+ if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+ /*
+ * Second node is adjacent to the first node.
+ * Because it is adjacent, its mask should be non-zero. If all
+ * its mask bits are set, then with it being adjacent, it should
+ * have had the mask bits moved into the num_after setting of the
+ * previous node.
+ */
+ return node_first_clear(nodep2, 0);
+}
+
+/* Returns index of next bit set within s after the index given by prev.
+ * Returns 0 if there are no bits after prev that are set.
+ */
+sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
+ sparsebit_idx_t prev)
+{
+ sparsebit_idx_t lowest_possible = prev + 1;
+ sparsebit_idx_t start;
+ struct node *nodep;
+
+ /* A bit after the highest index can't be set. */
+ if (lowest_possible == 0)
+ return 0;
+
+ /*
+ * Find the leftmost 'candidate' overlapping or to the right
+ * of lowest_possible.
+ */
+ struct node *candidate = NULL;
+
+ /* True iff lowest_possible is within candidate */
+ bool contains = false;
+
+ /*
+ * Find node that describes setting of bit at lowest_possible.
+ * If such a node doesn't exist, find the node with the lowest
+ * starting index that is > lowest_possible.
+ */
+ for (nodep = s->root; nodep;) {
+ if ((nodep->idx + MASK_BITS + nodep->num_after - 1)
+ >= lowest_possible) {
+ candidate = nodep;
+ if (candidate->idx <= lowest_possible) {
+ contains = true;
+ break;
+ }
+ nodep = nodep->left;
+ } else {
+ nodep = nodep->right;
+ }
+ }
+ if (!candidate)
+ return 0;
+
+ assert(candidate->mask != 0);
+
+ /* Does the candidate node describe the setting of lowest_possible? */
+ if (!contains) {
+ /*
+ * Candidate doesn't describe setting of bit at lowest_possible.
+ * Candidate points to the first node with a starting index
+ * > lowest_possible.
+ */
+ assert(candidate->idx > lowest_possible);
+
+ return node_first_set(candidate, 0);
+ }
+
+ /*
+ * Candidate describes setting of bit at lowest_possible.
+ * Note: although the node describes the setting of the bit
+ * at lowest_possible, its possible that its setting and the
+ * setting of all latter bits described by this node are 0.
+ * For now, just handle the cases where this node describes
+ * a bit at or after an index of lowest_possible that is set.
+ */
+ start = lowest_possible - candidate->idx;
+
+ if (start < MASK_BITS && candidate->mask >= (1 << start))
+ return node_first_set(candidate, start);
+
+ if (candidate->num_after) {
+ sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS;
+
+ return lowest_possible < first_num_after_idx
+ ? first_num_after_idx : lowest_possible;
+ }
+
+ /*
+ * Although candidate node describes setting of bit at
+ * the index of lowest_possible, all bits at that index and
+ * latter that are described by candidate are cleared. With
+ * this, the next bit is the first bit in the next node, if
+ * such a node exists. If a next node doesn't exist, then
+ * there is no next set bit.
+ */
+ candidate = node_next(s, candidate);
+ if (!candidate)
+ return 0;
+
+ return node_first_set(candidate, 0);
+}
+
+/* Returns index of next bit cleared within s after the index given by prev.
+ * Returns 0 if there are no bits after prev that are cleared.
+ */
+sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
+ sparsebit_idx_t prev)
+{
+ sparsebit_idx_t lowest_possible = prev + 1;
+ sparsebit_idx_t idx;
+ struct node *nodep1, *nodep2;
+
+ /* A bit after the highest index can't be set. */
+ if (lowest_possible == 0)
+ return 0;
+
+ /*
+ * Does a node describing the setting of lowest_possible exist?
+ * If not, the bit at lowest_possible is cleared.
+ */
+ nodep1 = node_find(s, lowest_possible);
+ if (!nodep1)
+ return lowest_possible;
+
+ /* Does a mask bit in node 1 describe the next cleared bit. */
+ for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++)
+ if (!(nodep1->mask & (1 << idx)))
+ return nodep1->idx + idx;
+
+ /*
+ * Next cleared bit is not described by node 1. If there
+ * isn't a next node, then next cleared bit is described
+ * by bit after the bits described by the first node.
+ */
+ nodep2 = node_next(s, nodep1);
+ if (!nodep2)
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+ /*
+ * There is a second node.
+ * If it is not adjacent to the first node, then there is a gap
+ * of cleared bits between the nodes, and the next cleared bit
+ * is the first bit within the gap.
+ */
+ if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+ /*
+ * Second node is adjacent to the first node.
+ * Because it is adjacent, its mask should be non-zero. If all
+ * its mask bits are set, then with it being adjacent, it should
+ * have had the mask bits moved into the num_after setting of the
+ * previous node.
+ */
+ return node_first_clear(nodep2, 0);
+}
+
+/* Starting with the index 1 greater than the index given by start, finds
+ * and returns the index of the first sequence of num consecutively set
+ * bits. Returns a value of 0 of no such sequence exists.
+ */
+sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
+ sparsebit_idx_t start, sparsebit_num_t num)
+{
+ sparsebit_idx_t idx;
+
+ assert(num >= 1);
+
+ for (idx = sparsebit_next_set(s, start);
+ idx != 0 && idx + num - 1 >= idx;
+ idx = sparsebit_next_set(s, idx)) {
+ assert(sparsebit_is_set(s, idx));
+
+ /*
+ * Does the sequence of bits starting at idx consist of
+ * num set bits?
+ */
+ if (sparsebit_is_set_num(s, idx, num))
+ return idx;
+
+ /*
+ * Sequence of set bits at idx isn't large enough.
+ * Skip this entire sequence of set bits.
+ */
+ idx = sparsebit_next_clear(s, idx);
+ if (idx == 0)
+ return 0;
+ }
+
+ return 0;
+}
+
+/* Starting with the index 1 greater than the index given by start, finds
+ * and returns the index of the first sequence of num consecutively cleared
+ * bits. Returns a value of 0 of no such sequence exists.
+ */
+sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,
+ sparsebit_idx_t start, sparsebit_num_t num)
+{
+ sparsebit_idx_t idx;
+
+ assert(num >= 1);
+
+ for (idx = sparsebit_next_clear(s, start);
+ idx != 0 && idx + num - 1 >= idx;
+ idx = sparsebit_next_clear(s, idx)) {
+ assert(sparsebit_is_clear(s, idx));
+
+ /*
+ * Does the sequence of bits starting at idx consist of
+ * num cleared bits?
+ */
+ if (sparsebit_is_clear_num(s, idx, num))
+ return idx;
+
+ /*
+ * Sequence of cleared bits at idx isn't large enough.
+ * Skip this entire sequence of cleared bits.
+ */
+ idx = sparsebit_next_set(s, idx);
+ if (idx == 0)
+ return 0;
+ }
+
+ return 0;
+}
+
+/* Sets the bits * in the inclusive range idx through idx + num - 1. */
+void sparsebit_set_num(struct sparsebit *s,
+ sparsebit_idx_t start, sparsebit_num_t num)
+{
+ struct node *nodep, *next;
+ unsigned int n1;
+ sparsebit_idx_t idx;
+ sparsebit_num_t n;
+ sparsebit_idx_t middle_start, middle_end;
+
+ assert(num > 0);
+ assert(start + num - 1 >= start);
+
+ /*
+ * Leading - bits before first mask boundary.
+ *
+ * TODO(lhuemill): With some effort it may be possible to
+ * replace the following loop with a sequential sequence
+ * of statements. High level sequence would be:
+ *
+ * 1. Use node_split() to force node that describes setting
+ * of idx to be within the mask portion of a node.
+ * 2. Form mask of bits to be set.
+ * 3. Determine number of mask bits already set in the node
+ * and store in a local variable named num_already_set.
+ * 4. Set the appropriate mask bits within the node.
+ * 5. Increment struct sparsebit_pvt num_set member
+ * by the number of bits that were actually set.
+ * Exclude from the counts bits that were already set.
+ * 6. Before returning to the caller, use node_reduce() to
+ * handle the multiple corner cases that this method
+ * introduces.
+ */
+ for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
+ bit_set(s, idx);
+
+ /* Middle - bits spanning one or more entire mask */
+ middle_start = idx;
+ middle_end = middle_start + (n & -MASK_BITS) - 1;
+ if (n >= MASK_BITS) {
+ nodep = node_split(s, middle_start);
+
+ /*
+ * As needed, split just after end of middle bits.
+ * No split needed if end of middle bits is at highest
+ * supported bit index.
+ */
+ if (middle_end + 1 > middle_end)
+ (void) node_split(s, middle_end + 1);
+
+ /* Delete nodes that only describe bits within the middle. */
+ for (next = node_next(s, nodep);
+ next && (next->idx < middle_end);
+ next = node_next(s, nodep)) {
+ assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
+ node_rm(s, next);
+ next = NULL;
+ }
+
+ /* As needed set each of the mask bits */
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (!(nodep->mask & (1 << n1))) {
+ nodep->mask |= 1 << n1;
+ s->num_set++;
+ }
+ }
+
+ s->num_set -= nodep->num_after;
+ nodep->num_after = middle_end - middle_start + 1 - MASK_BITS;
+ s->num_set += nodep->num_after;
+
+ node_reduce(s, nodep);
+ }
+ idx = middle_end + 1;
+ n -= middle_end - middle_start + 1;
+
+ /* Trailing - bits at and beyond last mask boundary */
+ assert(n < MASK_BITS);
+ for (; n > 0; idx++, n--)
+ bit_set(s, idx);
+}
+
+/* Clears the bits * in the inclusive range idx through idx + num - 1. */
+void sparsebit_clear_num(struct sparsebit *s,
+ sparsebit_idx_t start, sparsebit_num_t num)
+{
+ struct node *nodep, *next;
+ unsigned int n1;
+ sparsebit_idx_t idx;
+ sparsebit_num_t n;
+ sparsebit_idx_t middle_start, middle_end;
+
+ assert(num > 0);
+ assert(start + num - 1 >= start);
+
+ /* Leading - bits before first mask boundary */
+ for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
+ bit_clear(s, idx);
+
+ /* Middle - bits spanning one or more entire mask */
+ middle_start = idx;
+ middle_end = middle_start + (n & -MASK_BITS) - 1;
+ if (n >= MASK_BITS) {
+ nodep = node_split(s, middle_start);
+
+ /*
+ * As needed, split just after end of middle bits.
+ * No split needed if end of middle bits is at highest
+ * supported bit index.
+ */
+ if (middle_end + 1 > middle_end)
+ (void) node_split(s, middle_end + 1);
+
+ /* Delete nodes that only describe bits within the middle. */
+ for (next = node_next(s, nodep);
+ next && (next->idx < middle_end);
+ next = node_next(s, nodep)) {
+ assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
+ node_rm(s, next);
+ next = NULL;
+ }
+
+ /* As needed clear each of the mask bits */
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1)) {
+ nodep->mask &= ~(1 << n1);
+ s->num_set--;
+ }
+ }
+
+ /* Clear any bits described by num_after */
+ s->num_set -= nodep->num_after;
+ nodep->num_after = 0;
+
+ /*
+ * Delete the node that describes the beginning of
+ * the middle bits and perform any allowed reductions
+ * with the nodes prev or next of nodep.
+ */
+ node_reduce(s, nodep);
+ nodep = NULL;
+ }
+ idx = middle_end + 1;
+ n -= middle_end - middle_start + 1;
+
+ /* Trailing - bits at and beyond last mask boundary */
+ assert(n < MASK_BITS);
+ for (; n > 0; idx++, n--)
+ bit_clear(s, idx);
+}
+
+/* Sets the bit at the index given by idx. */
+void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ sparsebit_set_num(s, idx, 1);
+}
+
+/* Clears the bit at the index given by idx. */
+void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ sparsebit_clear_num(s, idx, 1);
+}
+
+/* Sets the bits in the entire addressable range of the sparsebit array. */
+void sparsebit_set_all(struct sparsebit *s)
+{
+ sparsebit_set(s, 0);
+ sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0);
+ assert(sparsebit_all_set(s));
+}
+
+/* Clears the bits in the entire addressable range of the sparsebit array. */
+void sparsebit_clear_all(struct sparsebit *s)
+{
+ sparsebit_clear(s, 0);
+ sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0);
+ assert(!sparsebit_any_set(s));
+}
+
+static size_t display_range(FILE *stream, sparsebit_idx_t low,
+ sparsebit_idx_t high, bool prepend_comma_space)
+{
+ char *fmt_str;
+ size_t sz;
+
+ /* Determine the printf format string */
+ if (low == high)
+ fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx";
+ else
+ fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx";
+
+ /*
+ * When stream is NULL, just determine the size of what would
+ * have been printed, else print the range.
+ */
+ if (!stream)
+ sz = snprintf(NULL, 0, fmt_str, low, high);
+ else
+ sz = fprintf(stream, fmt_str, low, high);
+
+ return sz;
+}
+
+
+/* Dumps to the FILE stream given by stream, the bit settings
+ * of s. Each line of output is prefixed with the number of
+ * spaces given by indent. The length of each line is implementation
+ * dependent and does not depend on the indent amount. The following
+ * is an example output of a sparsebit array that has bits:
+ *
+ * 0x5, 0x8, 0xa:0xe, 0x12
+ *
+ * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18
+ * are set. Note that a ':', instead of a '-' is used to specify a range of
+ * contiguous bits. This is done because '-' is used to specify command-line
+ * options, and sometimes ranges are specified as command-line arguments.
+ */
+void sparsebit_dump(FILE *stream, struct sparsebit *s,
+ unsigned int indent)
+{
+ size_t current_line_len = 0;
+ size_t sz;
+ struct node *nodep;
+
+ if (!sparsebit_any_set(s))
+ return;
+
+ /* Display initial indent */
+ fprintf(stream, "%*s", indent, "");
+
+ /* For each node */
+ for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) {
+ unsigned int n1;
+ sparsebit_idx_t low, high;
+
+ /* For each group of bits in the mask */
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1)) {
+ low = high = nodep->idx + n1;
+
+ for (; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1))
+ high = nodep->idx + n1;
+ else
+ break;
+ }
+
+ if ((n1 == MASK_BITS) && nodep->num_after)
+ high += nodep->num_after;
+
+ /*
+ * How much room will it take to display
+ * this range.
+ */
+ sz = display_range(NULL, low, high,
+ current_line_len != 0);
+
+ /*
+ * If there is not enough room, display
+ * a newline plus the indent of the next
+ * line.
+ */
+ if (current_line_len + sz > DUMP_LINE_MAX) {
+ fputs("\n", stream);
+ fprintf(stream, "%*s", indent, "");
+ current_line_len = 0;
+ }
+
+ /* Display the range */
+ sz = display_range(stream, low, high,
+ current_line_len != 0);
+ current_line_len += sz;
+ }
+ }
+
+ /*
+ * If num_after and most significant-bit of mask is not
+ * set, then still need to display a range for the bits
+ * described by num_after.
+ */
+ if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) {
+ low = nodep->idx + MASK_BITS;
+ high = nodep->idx + MASK_BITS + nodep->num_after - 1;
+
+ /*
+ * How much room will it take to display
+ * this range.
+ */
+ sz = display_range(NULL, low, high,
+ current_line_len != 0);
+
+ /*
+ * If there is not enough room, display
+ * a newline plus the indent of the next
+ * line.
+ */
+ if (current_line_len + sz > DUMP_LINE_MAX) {
+ fputs("\n", stream);
+ fprintf(stream, "%*s", indent, "");
+ current_line_len = 0;
+ }
+
+ /* Display the range */
+ sz = display_range(stream, low, high,
+ current_line_len != 0);
+ current_line_len += sz;
+ }
+ }
+ fputs("\n", stream);
+}
+
+/* Validates the internal state of the sparsebit array given by
+ * s. On error, diagnostic information is printed to stderr and
+ * abort is called.
+ */
+void sparsebit_validate_internal(struct sparsebit *s)
+{
+ bool error_detected = false;
+ struct node *nodep, *prev = NULL;
+ sparsebit_num_t total_bits_set = 0;
+ unsigned int n1;
+
+ /* For each node */
+ for (nodep = node_first(s); nodep;
+ prev = nodep, nodep = node_next(s, nodep)) {
+
+ /*
+ * Increase total bits set by the number of bits set
+ * in this node.
+ */
+ for (n1 = 0; n1 < MASK_BITS; n1++)
+ if (nodep->mask & (1 << n1))
+ total_bits_set++;
+
+ total_bits_set += nodep->num_after;
+
+ /*
+ * Arbitrary choice as to whether a mask of 0 is allowed
+ * or not. For diagnostic purposes it is beneficial to
+ * have only one valid means to represent a set of bits.
+ * To support this an arbitrary choice has been made
+ * to not allow a mask of zero.
+ */
+ if (nodep->mask == 0) {
+ fprintf(stderr, "Node mask of zero, "
+ "nodep: %p nodep->mask: 0x%x",
+ nodep, nodep->mask);
+ error_detected = true;
+ break;
+ }
+
+ /*
+ * Validate num_after is not greater than the max index
+ * - the number of mask bits. The num_after member
+ * uses 0-based indexing and thus has no value that
+ * represents all bits set. This limitation is handled
+ * by requiring a non-zero mask. With a non-zero mask,
+ * MASK_BITS worth of bits are described by the mask,
+ * which makes the largest needed num_after equal to:
+ *
+ * (~(sparsebit_num_t) 0) - MASK_BITS + 1
+ */
+ if (nodep->num_after
+ > (~(sparsebit_num_t) 0) - MASK_BITS + 1) {
+ fprintf(stderr, "num_after too large, "
+ "nodep: %p nodep->num_after: 0x%lx",
+ nodep, nodep->num_after);
+ error_detected = true;
+ break;
+ }
+
+ /* Validate node index is divisible by the mask size */
+ if (nodep->idx % MASK_BITS) {
+ fprintf(stderr, "Node index not divisible by "
+ "mask size,\n"
+ " nodep: %p nodep->idx: 0x%lx "
+ "MASK_BITS: %lu\n",
+ nodep, nodep->idx, MASK_BITS);
+ error_detected = true;
+ break;
+ }
+
+ /*
+ * Validate bits described by node don't wrap beyond the
+ * highest supported index.
+ */
+ if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) {
+ fprintf(stderr, "Bits described by node wrap "
+ "beyond highest supported index,\n"
+ " nodep: %p nodep->idx: 0x%lx\n"
+ " MASK_BITS: %lu nodep->num_after: 0x%lx",
+ nodep, nodep->idx, MASK_BITS, nodep->num_after);
+ error_detected = true;
+ break;
+ }
+
+ /* Check parent pointers. */
+ if (nodep->left) {
+ if (nodep->left->parent != nodep) {
+ fprintf(stderr, "Left child parent pointer "
+ "doesn't point to this node,\n"
+ " nodep: %p nodep->left: %p "
+ "nodep->left->parent: %p",
+ nodep, nodep->left,
+ nodep->left->parent);
+ error_detected = true;
+ break;
+ }
+ }
+
+ if (nodep->right) {
+ if (nodep->right->parent != nodep) {
+ fprintf(stderr, "Right child parent pointer "
+ "doesn't point to this node,\n"
+ " nodep: %p nodep->right: %p "
+ "nodep->right->parent: %p",
+ nodep, nodep->right,
+ nodep->right->parent);
+ error_detected = true;
+ break;
+ }
+ }
+
+ if (!nodep->parent) {
+ if (s->root != nodep) {
+ fprintf(stderr, "Unexpected root node, "
+ "s->root: %p nodep: %p",
+ s->root, nodep);
+ error_detected = true;
+ break;
+ }
+ }
+
+ if (prev) {
+ /*
+ * Is index of previous node before index of
+ * current node?
+ */
+ if (prev->idx >= nodep->idx) {
+ fprintf(stderr, "Previous node index "
+ ">= current node index,\n"
+ " prev: %p prev->idx: 0x%lx\n"
+ " nodep: %p nodep->idx: 0x%lx",
+ prev, prev->idx, nodep, nodep->idx);
+ error_detected = true;
+ break;
+ }
+
+ /*
+ * Nodes occur in asscending order, based on each
+ * nodes starting index.
+ */
+ if ((prev->idx + MASK_BITS + prev->num_after - 1)
+ >= nodep->idx) {
+ fprintf(stderr, "Previous node bit range "
+ "overlap with current node bit range,\n"
+ " prev: %p prev->idx: 0x%lx "
+ "prev->num_after: 0x%lx\n"
+ " nodep: %p nodep->idx: 0x%lx "
+ "nodep->num_after: 0x%lx\n"
+ " MASK_BITS: %lu",
+ prev, prev->idx, prev->num_after,
+ nodep, nodep->idx, nodep->num_after,
+ MASK_BITS);
+ error_detected = true;
+ break;
+ }
+
+ /*
+ * When the node has all mask bits set, it shouldn't
+ * be adjacent to the last bit described by the
+ * previous node.
+ */
+ if (nodep->mask == ~(mask_t) 0 &&
+ prev->idx + MASK_BITS + prev->num_after == nodep->idx) {
+ fprintf(stderr, "Current node has mask with "
+ "all bits set and is adjacent to the "
+ "previous node,\n"
+ " prev: %p prev->idx: 0x%lx "
+ "prev->num_after: 0x%lx\n"
+ " nodep: %p nodep->idx: 0x%lx "
+ "nodep->num_after: 0x%lx\n"
+ " MASK_BITS: %lu",
+ prev, prev->idx, prev->num_after,
+ nodep, nodep->idx, nodep->num_after,
+ MASK_BITS);
+
+ error_detected = true;
+ break;
+ }
+ }
+ }
+
+ if (!error_detected) {
+ /*
+ * Is sum of bits set in each node equal to the count
+ * of total bits set.
+ */
+ if (s->num_set != total_bits_set) {
+ fprintf(stderr, "Number of bits set missmatch,\n"
+ " s->num_set: 0x%lx total_bits_set: 0x%lx",
+ s->num_set, total_bits_set);
+
+ error_detected = true;
+ }
+ }
+
+ if (error_detected) {
+ fputs(" dump_internal:\n", stderr);
+ sparsebit_dump_internal(stderr, s, 4);
+ abort();
+ }
+}
+
+
+#ifdef FUZZ
+/* A simple but effective fuzzing driver. Look for bugs with the help
+ * of some invariants and of a trivial representation of sparsebit.
+ * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let
+ * afl-fuzz do the magic. :)
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+
+struct range {
+ sparsebit_idx_t first, last;
+ bool set;
+};
+
+struct sparsebit *s;
+struct range ranges[1000];
+int num_ranges;
+
+static bool get_value(sparsebit_idx_t idx)
+{
+ int i;
+
+ for (i = num_ranges; --i >= 0; )
+ if (ranges[i].first <= idx && idx <= ranges[i].last)
+ return ranges[i].set;
+
+ return false;
+}
+
+static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last)
+{
+ sparsebit_num_t num;
+ sparsebit_idx_t next;
+
+ if (first < last) {
+ num = last - first + 1;
+ } else {
+ num = first - last + 1;
+ first = last;
+ last = first + num - 1;
+ }
+
+ switch (code) {
+ case 0:
+ sparsebit_set(s, first);
+ assert(sparsebit_is_set(s, first));
+ assert(!sparsebit_is_clear(s, first));
+ assert(sparsebit_any_set(s));
+ assert(!sparsebit_all_clear(s));
+ if (get_value(first))
+ return;
+ if (num_ranges == 1000)
+ exit(0);
+ ranges[num_ranges++] = (struct range)
+ { .first = first, .last = first, .set = true };
+ break;
+ case 1:
+ sparsebit_clear(s, first);
+ assert(!sparsebit_is_set(s, first));
+ assert(sparsebit_is_clear(s, first));
+ assert(sparsebit_any_clear(s));
+ assert(!sparsebit_all_set(s));
+ if (!get_value(first))
+ return;
+ if (num_ranges == 1000)
+ exit(0);
+ ranges[num_ranges++] = (struct range)
+ { .first = first, .last = first, .set = false };
+ break;
+ case 2:
+ assert(sparsebit_is_set(s, first) == get_value(first));
+ assert(sparsebit_is_clear(s, first) == !get_value(first));
+ break;
+ case 3:
+ if (sparsebit_any_set(s))
+ assert(get_value(sparsebit_first_set(s)));
+ if (sparsebit_any_clear(s))
+ assert(!get_value(sparsebit_first_clear(s)));
+ sparsebit_set_all(s);
+ assert(!sparsebit_any_clear(s));
+ assert(sparsebit_all_set(s));
+ num_ranges = 0;
+ ranges[num_ranges++] = (struct range)
+ { .first = 0, .last = ~(sparsebit_idx_t)0, .set = true };
+ break;
+ case 4:
+ if (sparsebit_any_set(s))
+ assert(get_value(sparsebit_first_set(s)));
+ if (sparsebit_any_clear(s))
+ assert(!get_value(sparsebit_first_clear(s)));
+ sparsebit_clear_all(s);
+ assert(!sparsebit_any_set(s));
+ assert(sparsebit_all_clear(s));
+ num_ranges = 0;
+ break;
+ case 5:
+ next = sparsebit_next_set(s, first);
+ assert(next == 0 || next > first);
+ assert(next == 0 || get_value(next));
+ break;
+ case 6:
+ next = sparsebit_next_clear(s, first);
+ assert(next == 0 || next > first);
+ assert(next == 0 || !get_value(next));
+ break;
+ case 7:
+ next = sparsebit_next_clear(s, first);
+ if (sparsebit_is_set_num(s, first, num)) {
+ assert(next == 0 || next > last);
+ if (first)
+ next = sparsebit_next_set(s, first - 1);
+ else if (sparsebit_any_set(s))
+ next = sparsebit_first_set(s);
+ else
+ return;
+ assert(next == first);
+ } else {
+ assert(sparsebit_is_clear(s, first) || next <= last);
+ }
+ break;
+ case 8:
+ next = sparsebit_next_set(s, first);
+ if (sparsebit_is_clear_num(s, first, num)) {
+ assert(next == 0 || next > last);
+ if (first)
+ next = sparsebit_next_clear(s, first - 1);
+ else if (sparsebit_any_clear(s))
+ next = sparsebit_first_clear(s);
+ else
+ return;
+ assert(next == first);
+ } else {
+ assert(sparsebit_is_set(s, first) || next <= last);
+ }
+ break;
+ case 9:
+ sparsebit_set_num(s, first, num);
+ assert(sparsebit_is_set_num(s, first, num));
+ assert(!sparsebit_is_clear_num(s, first, num));
+ assert(sparsebit_any_set(s));
+ assert(!sparsebit_all_clear(s));
+ if (num_ranges == 1000)
+ exit(0);
+ ranges[num_ranges++] = (struct range)
+ { .first = first, .last = last, .set = true };
+ break;
+ case 10:
+ sparsebit_clear_num(s, first, num);
+ assert(!sparsebit_is_set_num(s, first, num));
+ assert(sparsebit_is_clear_num(s, first, num));
+ assert(sparsebit_any_clear(s));
+ assert(!sparsebit_all_set(s));
+ if (num_ranges == 1000)
+ exit(0);
+ ranges[num_ranges++] = (struct range)
+ { .first = first, .last = last, .set = false };
+ break;
+ case 11:
+ sparsebit_validate_internal(s);
+ break;
+ default:
+ break;
+ }
+}
+
+unsigned char get8(void)
+{
+ int ch;
+
+ ch = getchar();
+ if (ch == EOF)
+ exit(0);
+ return ch;
+}
+
+uint64_t get64(void)
+{
+ uint64_t x;
+
+ x = get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ return (x << 8) | get8();
+}
+
+int main(void)
+{
+ s = sparsebit_alloc();
+ for (;;) {
+ uint8_t op = get8() & 0xf;
+ uint64_t first = get64();
+ uint64_t last = get64();
+
+ operate(op, first, last);
+ }
+}
+#endif
diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/vmx.c
new file mode 100644
index 000000000..b987c3c97
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/vmx.c
@@ -0,0 +1,283 @@
+/*
+ * tools/testing/selftests/kvm/lib/x86.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+#include "vmx.h"
+
+/* Allocate memory regions for nested VMX tests.
+ *
+ * Input Args:
+ * vm - The VM to allocate guest-virtual addresses in.
+ *
+ * Output Args:
+ * p_vmx_gva - The guest virtual address for the struct vmx_pages.
+ *
+ * Return:
+ * Pointer to structure with the addresses of the VMX areas.
+ */
+struct vmx_pages *
+vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
+{
+ vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
+
+ /* Setup of a region of guest memory for the vmxon region. */
+ vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
+ vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
+
+ /* Setup of a region of guest memory for a vmcs. */
+ vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
+ vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
+
+ /* Setup of a region of guest memory for the MSR bitmap. */
+ vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
+ vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
+ memset(vmx->msr_hva, 0, getpagesize());
+
+ /* Setup of a region of guest memory for the shadow VMCS. */
+ vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
+ vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
+
+ /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
+ vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
+ vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
+ memset(vmx->vmread_hva, 0, getpagesize());
+
+ vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
+ vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
+ memset(vmx->vmwrite_hva, 0, getpagesize());
+
+ *p_vmx_gva = vmx_gva;
+ return vmx;
+}
+
+bool prepare_for_vmx_operation(struct vmx_pages *vmx)
+{
+ uint64_t feature_control;
+ uint64_t required;
+ unsigned long cr0;
+ unsigned long cr4;
+
+ /*
+ * Ensure bits in CR0 and CR4 are valid in VMX operation:
+ * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx.
+ * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx.
+ */
+ __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory");
+ cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1);
+ cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0);
+ __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory");
+
+ __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory");
+ cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1);
+ cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0);
+ /* Enable VMX operation */
+ cr4 |= X86_CR4_VMXE;
+ __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory");
+
+ /*
+ * Configure IA32_FEATURE_CONTROL MSR to allow VMXON:
+ * Bit 0: Lock bit. If clear, VMXON causes a #GP.
+ * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON
+ * outside of SMX causes a #GP.
+ */
+ required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+ required |= FEATURE_CONTROL_LOCKED;
+ feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
+ if ((feature_control & required) != required)
+ wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required);
+
+ /* Enter VMX root operation. */
+ *(uint32_t *)(vmx->vmxon) = vmcs_revision();
+ if (vmxon(vmx->vmxon_gpa))
+ return false;
+
+ /* Load a VMCS. */
+ *(uint32_t *)(vmx->vmcs) = vmcs_revision();
+ if (vmclear(vmx->vmcs_gpa))
+ return false;
+
+ if (vmptrld(vmx->vmcs_gpa))
+ return false;
+
+ /* Setup shadow VMCS, do not load it yet. */
+ *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
+ if (vmclear(vmx->shadow_vmcs_gpa))
+ return false;
+
+ return true;
+}
+
+/*
+ * Initialize the control fields to the most basic settings possible.
+ */
+static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
+{
+ vmwrite(VIRTUAL_PROCESSOR_ID, 0);
+ vmwrite(POSTED_INTR_NV, 0);
+
+ vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
+ if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, 0))
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+ rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
+ else
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS));
+ vmwrite(EXCEPTION_BITMAP, 0);
+ vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
+ vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
+ vmwrite(CR3_TARGET_COUNT, 0);
+ vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) |
+ VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */
+ vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
+ vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+ vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) |
+ VM_ENTRY_IA32E_MODE); /* 64-bit guest */
+ vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
+ vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
+ vmwrite(TPR_THRESHOLD, 0);
+
+ vmwrite(CR0_GUEST_HOST_MASK, 0);
+ vmwrite(CR4_GUEST_HOST_MASK, 0);
+ vmwrite(CR0_READ_SHADOW, get_cr0());
+ vmwrite(CR4_READ_SHADOW, get_cr4());
+
+ vmwrite(MSR_BITMAP, vmx->msr_gpa);
+ vmwrite(VMREAD_BITMAP, vmx->vmread_gpa);
+ vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa);
+}
+
+/*
+ * Initialize the host state fields based on the current host state, with
+ * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch
+ * or vmresume.
+ */
+static inline void init_vmcs_host_state(void)
+{
+ uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS);
+
+ vmwrite(HOST_ES_SELECTOR, get_es());
+ vmwrite(HOST_CS_SELECTOR, get_cs());
+ vmwrite(HOST_SS_SELECTOR, get_ss());
+ vmwrite(HOST_DS_SELECTOR, get_ds());
+ vmwrite(HOST_FS_SELECTOR, get_fs());
+ vmwrite(HOST_GS_SELECTOR, get_gs());
+ vmwrite(HOST_TR_SELECTOR, get_tr());
+
+ if (exit_controls & VM_EXIT_LOAD_IA32_PAT)
+ vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT));
+ if (exit_controls & VM_EXIT_LOAD_IA32_EFER)
+ vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER));
+ if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+ vmwrite(HOST_IA32_PERF_GLOBAL_CTRL,
+ rdmsr(MSR_CORE_PERF_GLOBAL_CTRL));
+
+ vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS));
+
+ vmwrite(HOST_CR0, get_cr0());
+ vmwrite(HOST_CR3, get_cr3());
+ vmwrite(HOST_CR4, get_cr4());
+ vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE));
+ vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE));
+ vmwrite(HOST_TR_BASE,
+ get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr())));
+ vmwrite(HOST_GDTR_BASE, get_gdt_base());
+ vmwrite(HOST_IDTR_BASE, get_idt_base());
+ vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP));
+ vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP));
+}
+
+/*
+ * Initialize the guest state fields essentially as a clone of
+ * the host state fields. Some host state fields have fixed
+ * values, and we set the corresponding guest state fields accordingly.
+ */
+static inline void init_vmcs_guest_state(void *rip, void *rsp)
+{
+ vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR));
+ vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR));
+ vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR));
+ vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR));
+ vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR));
+ vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR));
+ vmwrite(GUEST_LDTR_SELECTOR, 0);
+ vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR));
+ vmwrite(GUEST_INTR_STATUS, 0);
+ vmwrite(GUEST_PML_INDEX, 0);
+
+ vmwrite(VMCS_LINK_POINTER, -1ll);
+ vmwrite(GUEST_IA32_DEBUGCTL, 0);
+ vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT));
+ vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER));
+ vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL,
+ vmreadz(HOST_IA32_PERF_GLOBAL_CTRL));
+
+ vmwrite(GUEST_ES_LIMIT, -1);
+ vmwrite(GUEST_CS_LIMIT, -1);
+ vmwrite(GUEST_SS_LIMIT, -1);
+ vmwrite(GUEST_DS_LIMIT, -1);
+ vmwrite(GUEST_FS_LIMIT, -1);
+ vmwrite(GUEST_GS_LIMIT, -1);
+ vmwrite(GUEST_LDTR_LIMIT, -1);
+ vmwrite(GUEST_TR_LIMIT, 0x67);
+ vmwrite(GUEST_GDTR_LIMIT, 0xffff);
+ vmwrite(GUEST_IDTR_LIMIT, 0xffff);
+ vmwrite(GUEST_ES_AR_BYTES,
+ vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
+ vmwrite(GUEST_SS_AR_BYTES, 0xc093);
+ vmwrite(GUEST_DS_AR_BYTES,
+ vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_FS_AR_BYTES,
+ vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_GS_AR_BYTES,
+ vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_LDTR_AR_BYTES, 0x10000);
+ vmwrite(GUEST_TR_AR_BYTES, 0x8b);
+ vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+ vmwrite(GUEST_ACTIVITY_STATE, 0);
+ vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS));
+ vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0);
+
+ vmwrite(GUEST_CR0, vmreadz(HOST_CR0));
+ vmwrite(GUEST_CR3, vmreadz(HOST_CR3));
+ vmwrite(GUEST_CR4, vmreadz(HOST_CR4));
+ vmwrite(GUEST_ES_BASE, 0);
+ vmwrite(GUEST_CS_BASE, 0);
+ vmwrite(GUEST_SS_BASE, 0);
+ vmwrite(GUEST_DS_BASE, 0);
+ vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE));
+ vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE));
+ vmwrite(GUEST_LDTR_BASE, 0);
+ vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE));
+ vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE));
+ vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE));
+ vmwrite(GUEST_DR7, 0x400);
+ vmwrite(GUEST_RSP, (uint64_t)rsp);
+ vmwrite(GUEST_RIP, (uint64_t)rip);
+ vmwrite(GUEST_RFLAGS, 2);
+ vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+ vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP));
+ vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP));
+}
+
+void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
+{
+ init_vmcs_control_fields(vmx);
+ init_vmcs_host_state();
+ init_vmcs_guest_state(guest_rip, guest_rsp);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86.c
new file mode 100644
index 000000000..800fe3606
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86.c
@@ -0,0 +1,893 @@
+/*
+ * tools/testing/selftests/kvm/lib/x86.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+#include "x86.h"
+
+/* Minimum physical address used for virtual translation tables. */
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+/* Virtual translation table structure declarations */
+struct pageMapL4Entry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t ignored_06:1;
+ uint64_t page_size:1;
+ uint64_t ignored_11_08:4;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+struct pageDirectoryPointerEntry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t ignored_06:1;
+ uint64_t page_size:1;
+ uint64_t ignored_11_08:4;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+struct pageDirectoryEntry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t ignored_06:1;
+ uint64_t page_size:1;
+ uint64_t ignored_11_08:4;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+struct pageTableEntry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t dirty:1;
+ uint64_t reserved_07:1;
+ uint64_t global:1;
+ uint64_t ignored_11_09:3;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+/* Register Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * regs - register
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the registers given by regs, to the FILE stream
+ * given by steam.
+ */
+void regs_dump(FILE *stream, struct kvm_regs *regs,
+ uint8_t indent)
+{
+ fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
+ "rcx: 0x%.16llx rdx: 0x%.16llx\n",
+ indent, "",
+ regs->rax, regs->rbx, regs->rcx, regs->rdx);
+ fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
+ "rsp: 0x%.16llx rbp: 0x%.16llx\n",
+ indent, "",
+ regs->rsi, regs->rdi, regs->rsp, regs->rbp);
+ fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx "
+ "r10: 0x%.16llx r11: 0x%.16llx\n",
+ indent, "",
+ regs->r8, regs->r9, regs->r10, regs->r11);
+ fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
+ "r14: 0x%.16llx r15: 0x%.16llx\n",
+ indent, "",
+ regs->r12, regs->r13, regs->r14, regs->r15);
+ fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
+ indent, "",
+ regs->rip, regs->rflags);
+}
+
+/* Segment Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * segment - KVM segment
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the KVM segment given by segment, to the FILE stream
+ * given by steam.
+ */
+static void segment_dump(FILE *stream, struct kvm_segment *segment,
+ uint8_t indent)
+{
+ fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
+ "selector: 0x%.4x type: 0x%.2x\n",
+ indent, "", segment->base, segment->limit,
+ segment->selector, segment->type);
+ fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
+ "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
+ indent, "", segment->present, segment->dpl,
+ segment->db, segment->s, segment->l);
+ fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
+ "unusable: 0x%.2x padding: 0x%.2x\n",
+ indent, "", segment->g, segment->avl,
+ segment->unusable, segment->padding);
+}
+
+/* dtable Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * dtable - KVM dtable
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the KVM dtable given by dtable, to the FILE stream
+ * given by steam.
+ */
+static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
+ uint8_t indent)
+{
+ fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
+ "padding: 0x%.4x 0x%.4x 0x%.4x\n",
+ indent, "", dtable->base, dtable->limit,
+ dtable->padding[0], dtable->padding[1], dtable->padding[2]);
+}
+
+/* System Register Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * sregs - System registers
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the system registers given by sregs, to the FILE stream
+ * given by steam.
+ */
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
+ uint8_t indent)
+{
+ unsigned int i;
+
+ fprintf(stream, "%*scs:\n", indent, "");
+ segment_dump(stream, &sregs->cs, indent + 2);
+ fprintf(stream, "%*sds:\n", indent, "");
+ segment_dump(stream, &sregs->ds, indent + 2);
+ fprintf(stream, "%*ses:\n", indent, "");
+ segment_dump(stream, &sregs->es, indent + 2);
+ fprintf(stream, "%*sfs:\n", indent, "");
+ segment_dump(stream, &sregs->fs, indent + 2);
+ fprintf(stream, "%*sgs:\n", indent, "");
+ segment_dump(stream, &sregs->gs, indent + 2);
+ fprintf(stream, "%*sss:\n", indent, "");
+ segment_dump(stream, &sregs->ss, indent + 2);
+ fprintf(stream, "%*str:\n", indent, "");
+ segment_dump(stream, &sregs->tr, indent + 2);
+ fprintf(stream, "%*sldt:\n", indent, "");
+ segment_dump(stream, &sregs->ldt, indent + 2);
+
+ fprintf(stream, "%*sgdt:\n", indent, "");
+ dtable_dump(stream, &sregs->gdt, indent + 2);
+ fprintf(stream, "%*sidt:\n", indent, "");
+ dtable_dump(stream, &sregs->idt, indent + 2);
+
+ fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
+ "cr3: 0x%.16llx cr4: 0x%.16llx\n",
+ indent, "",
+ sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
+ fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
+ "apic_base: 0x%.16llx\n",
+ indent, "",
+ sregs->cr8, sregs->efer, sregs->apic_base);
+
+ fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
+ for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
+ fprintf(stream, "%*s%.16llx\n", indent + 2, "",
+ sregs->interrupt_bitmap[i]);
+ }
+}
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+{
+ int rc;
+
+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ /* If needed, create page map l4 table. */
+ if (!vm->pgd_created) {
+ vm_paddr_t paddr = vm_phy_page_alloc(vm,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+ vm->pgd = paddr;
+ vm->pgd_created = true;
+ }
+}
+
+/* VM Virtual Page Map
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vaddr - VM Virtual Address
+ * paddr - VM Physical Address
+ * pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a virtual translation for the page
+ * starting at vaddr to the page starting at paddr.
+ */
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint32_t pgd_memslot)
+{
+ uint16_t index[4];
+ struct pageMapL4Entry *pml4e;
+
+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ " vaddr: 0x%lx vm->page_size: 0x%x",
+ vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx",
+ vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ index[0] = (vaddr >> 12) & 0x1ffu;
+ index[1] = (vaddr >> 21) & 0x1ffu;
+ index[2] = (vaddr >> 30) & 0x1ffu;
+ index[3] = (vaddr >> 39) & 0x1ffu;
+
+ /* Allocate page directory pointer table if not present. */
+ pml4e = addr_gpa2hva(vm, vm->pgd);
+ if (!pml4e[index[3]].present) {
+ pml4e[index[3]].address = vm_phy_page_alloc(vm,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+ >> vm->page_shift;
+ pml4e[index[3]].writable = true;
+ pml4e[index[3]].present = true;
+ }
+
+ /* Allocate page directory table if not present. */
+ struct pageDirectoryPointerEntry *pdpe;
+ pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+ if (!pdpe[index[2]].present) {
+ pdpe[index[2]].address = vm_phy_page_alloc(vm,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+ >> vm->page_shift;
+ pdpe[index[2]].writable = true;
+ pdpe[index[2]].present = true;
+ }
+
+ /* Allocate page table if not present. */
+ struct pageDirectoryEntry *pde;
+ pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+ if (!pde[index[1]].present) {
+ pde[index[1]].address = vm_phy_page_alloc(vm,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+ >> vm->page_shift;
+ pde[index[1]].writable = true;
+ pde[index[1]].present = true;
+ }
+
+ /* Fill in page table entry. */
+ struct pageTableEntry *pte;
+ pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+ pte[index[0]].address = paddr >> vm->page_shift;
+ pte[index[0]].writable = true;
+ pte[index[0]].present = 1;
+}
+
+/* Virtual Translation Tables Dump
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * indent - Left margin indent amount
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps to the FILE stream given by stream, the contents of all the
+ * virtual translation tables for the VM given by vm.
+ */
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ struct pageMapL4Entry *pml4e, *pml4e_start;
+ struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
+ struct pageDirectoryEntry *pde, *pde_start;
+ struct pageTableEntry *pte, *pte_start;
+
+ if (!vm->pgd_created)
+ return;
+
+ fprintf(stream, "%*s "
+ " no\n", indent, "");
+ fprintf(stream, "%*s index hvaddr gpaddr "
+ "addr w exec dirty\n",
+ indent, "");
+ pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
+ vm->pgd);
+ for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
+ pml4e = &pml4e_start[n1];
+ if (!pml4e->present)
+ continue;
+ fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
+ " %u\n",
+ indent, "",
+ pml4e - pml4e_start, pml4e,
+ addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
+ pml4e->writable, pml4e->execute_disable);
+
+ pdpe_start = addr_gpa2hva(vm, pml4e->address
+ * vm->page_size);
+ for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
+ pdpe = &pdpe_start[n2];
+ if (!pdpe->present)
+ continue;
+ fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
+ "%u %u\n",
+ indent, "",
+ pdpe - pdpe_start, pdpe,
+ addr_hva2gpa(vm, pdpe),
+ (uint64_t) pdpe->address, pdpe->writable,
+ pdpe->execute_disable);
+
+ pde_start = addr_gpa2hva(vm,
+ pdpe->address * vm->page_size);
+ for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
+ pde = &pde_start[n3];
+ if (!pde->present)
+ continue;
+ fprintf(stream, "%*spde 0x%-3zx %p "
+ "0x%-12lx 0x%-10lx %u %u\n",
+ indent, "", pde - pde_start, pde,
+ addr_hva2gpa(vm, pde),
+ (uint64_t) pde->address, pde->writable,
+ pde->execute_disable);
+
+ pte_start = addr_gpa2hva(vm,
+ pde->address * vm->page_size);
+ for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
+ pte = &pte_start[n4];
+ if (!pte->present)
+ continue;
+ fprintf(stream, "%*spte 0x%-3zx %p "
+ "0x%-12lx 0x%-10lx %u %u "
+ " %u 0x%-10lx\n",
+ indent, "",
+ pte - pte_start, pte,
+ addr_hva2gpa(vm, pte),
+ (uint64_t) pte->address,
+ pte->writable,
+ pte->execute_disable,
+ pte->dirty,
+ ((uint64_t) n1 << 27)
+ | ((uint64_t) n2 << 18)
+ | ((uint64_t) n3 << 9)
+ | ((uint64_t) n4));
+ }
+ }
+ }
+ }
+}
+
+/* Set Unusable Segment
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ * segp - Pointer to segment register
+ *
+ * Return: None
+ *
+ * Sets the segment register pointed to by segp to an unusable state.
+ */
+static void kvm_seg_set_unusable(struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->unusable = true;
+}
+
+static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
+{
+ void *gdt = addr_gva2hva(vm, vm->gdt);
+ struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
+
+ desc->limit0 = segp->limit & 0xFFFF;
+ desc->base0 = segp->base & 0xFFFF;
+ desc->base1 = segp->base >> 16;
+ desc->type = segp->type;
+ desc->s = segp->s;
+ desc->dpl = segp->dpl;
+ desc->p = segp->present;
+ desc->limit1 = segp->limit >> 16;
+ desc->avl = segp->avl;
+ desc->l = segp->l;
+ desc->db = segp->db;
+ desc->g = segp->g;
+ desc->base2 = segp->base >> 24;
+ if (!segp->s)
+ desc->base3 = segp->base >> 32;
+}
+
+
+/* Set Long Mode Flat Kernel Code Segment
+ *
+ * Input Args:
+ * vm - VM whose GDT is being filled, or NULL to only write segp
+ * selector - selector value
+ *
+ * Output Args:
+ * segp - Pointer to KVM segment
+ *
+ * Return: None
+ *
+ * Sets up the KVM segment pointed to by segp, to be a code segment
+ * with the selector value given by selector.
+ */
+static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
+ struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->selector = selector;
+ segp->limit = 0xFFFFFFFFu;
+ segp->s = 0x1; /* kTypeCodeData */
+ segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
+ * | kFlagCodeReadable
+ */
+ segp->g = true;
+ segp->l = true;
+ segp->present = 1;
+ if (vm)
+ kvm_seg_fill_gdt_64bit(vm, segp);
+}
+
+/* Set Long Mode Flat Kernel Data Segment
+ *
+ * Input Args:
+ * vm - VM whose GDT is being filled, or NULL to only write segp
+ * selector - selector value
+ *
+ * Output Args:
+ * segp - Pointer to KVM segment
+ *
+ * Return: None
+ *
+ * Sets up the KVM segment pointed to by segp, to be a data segment
+ * with the selector value given by selector.
+ */
+static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
+ struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->selector = selector;
+ segp->limit = 0xFFFFFFFFu;
+ segp->s = 0x1; /* kTypeCodeData */
+ segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
+ * | kFlagDataWritable
+ */
+ segp->g = true;
+ segp->present = true;
+ if (vm)
+ kvm_seg_fill_gdt_64bit(vm, segp);
+}
+
+/* Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gpa - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Translates the VM virtual address given by gva to a VM physical
+ * address and then locates the memory region containing the VM
+ * physical address, within the VM given by vm. When found, the host
+ * virtual address providing the memory to the vm physical address is returned.
+ * A TEST_ASSERT failure occurs if no region containing translated
+ * VM virtual address exists.
+ */
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint16_t index[4];
+ struct pageMapL4Entry *pml4e;
+ struct pageDirectoryPointerEntry *pdpe;
+ struct pageDirectoryEntry *pde;
+ struct pageTableEntry *pte;
+ void *hva;
+
+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ index[0] = (gva >> 12) & 0x1ffu;
+ index[1] = (gva >> 21) & 0x1ffu;
+ index[2] = (gva >> 30) & 0x1ffu;
+ index[3] = (gva >> 39) & 0x1ffu;
+
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+ pml4e = addr_gpa2hva(vm, vm->pgd);
+ if (!pml4e[index[3]].present)
+ goto unmapped_gva;
+
+ pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+ if (!pdpe[index[2]].present)
+ goto unmapped_gva;
+
+ pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+ if (!pde[index[1]].present)
+ goto unmapped_gva;
+
+ pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+ if (!pte[index[0]].present)
+ goto unmapped_gva;
+
+ return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
+
+unmapped_gva:
+ TEST_ASSERT(false, "No mapping for vm virtual address, "
+ "gva: 0x%lx", gva);
+}
+
+static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot,
+ int pgd_memslot)
+{
+ if (!vm->gdt)
+ vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
+ KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+
+ dt->base = vm->gdt;
+ dt->limit = getpagesize();
+}
+
+static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
+ int selector, int gdt_memslot,
+ int pgd_memslot)
+{
+ if (!vm->tss)
+ vm->tss = vm_vaddr_alloc(vm, getpagesize(),
+ KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+
+ memset(segp, 0, sizeof(*segp));
+ segp->base = vm->tss;
+ segp->limit = 0x67;
+ segp->selector = selector;
+ segp->type = 0xb;
+ segp->present = 1;
+ kvm_seg_fill_gdt_64bit(vm, segp);
+}
+
+void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
+{
+ struct kvm_sregs sregs;
+
+ /* Set mode specific system register values. */
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+
+ sregs.idt.limit = 0;
+
+ kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
+
+ switch (vm->mode) {
+ case VM_MODE_FLAT48PG:
+ sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
+ sregs.cr4 |= X86_CR4_PAE;
+ sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
+
+ kvm_seg_set_unusable(&sregs.ldt);
+ kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs);
+ kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds);
+ kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es);
+ kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
+ break;
+
+ default:
+ TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ sregs.cr3 = vm->pgd;
+ vcpu_sregs_set(vm, vcpuid, &sregs);
+}
+/* Adds a vCPU with reasonable defaults (i.e., a stack)
+ *
+ * Input Args:
+ * vcpuid - The id of the VCPU to add to the VM.
+ * guest_code - The vCPU's entry point
+ */
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+ struct kvm_mp_state mp_state;
+ struct kvm_regs regs;
+ vm_vaddr_t stack_vaddr;
+ stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
+ DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+
+ /* Create VCPU */
+ vm_vcpu_add(vm, vcpuid, 0, 0);
+
+ /* Setup guest general purpose registers */
+ vcpu_regs_get(vm, vcpuid, &regs);
+ regs.rflags = regs.rflags | 0x2;
+ regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
+ regs.rip = (unsigned long) guest_code;
+ vcpu_regs_set(vm, vcpuid, &regs);
+
+ /* Setup the MP state */
+ mp_state.mp_state = 0;
+ vcpu_set_mp_state(vm, vcpuid, &mp_state);
+}
+
+/* VM VCPU CPUID Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU id
+ * cpuid - The CPUID values to set.
+ *
+ * Output Args: None
+ *
+ * Return: void
+ *
+ * Set the VCPU's CPUID.
+ */
+void vcpu_set_cpuid(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int rc;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
+ TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
+ rc, errno);
+
+}
+/* Create a VM with reasonable defaults
+ *
+ * Input Args:
+ * vcpuid - The id of the single VCPU to add to the VM.
+ * extra_mem_pages - The size of extra memories to add (this will
+ * decide how much extra space we will need to
+ * setup the page tables using mem slot 0)
+ * guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ */
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+ void *guest_code)
+{
+ struct kvm_vm *vm;
+ /*
+ * For x86 the maximum page table size for a memory region
+ * will be when only 4K pages are used. In that case the
+ * total extra size for page tables (for extra N pages) will
+ * be: N/512+N/512^2+N/512^3+... which is definitely smaller
+ * than N/512*2.
+ */
+ uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
+
+ /* Create VM */
+ vm = vm_create(VM_MODE_FLAT48PG,
+ DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
+ O_RDWR);
+
+ /* Setup guest code */
+ kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+
+ /* Setup IRQ Chip */
+ vm_create_irqchip(vm);
+
+ /* Add the first vCPU. */
+ vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+ return vm;
+}
+
+struct kvm_x86_state {
+ struct kvm_vcpu_events events;
+ struct kvm_mp_state mp_state;
+ struct kvm_regs regs;
+ struct kvm_xsave xsave;
+ struct kvm_xcrs xcrs;
+ struct kvm_sregs sregs;
+ struct kvm_debugregs debugregs;
+ union {
+ struct kvm_nested_state nested;
+ char nested_[16384];
+ };
+ struct kvm_msrs msrs;
+};
+
+static int kvm_get_num_msrs(struct kvm_vm *vm)
+{
+ struct kvm_msr_list nmsrs;
+ int r;
+
+ nmsrs.nmsrs = 0;
+ r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
+ TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
+ r);
+
+ return nmsrs.nmsrs;
+}
+
+struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ struct kvm_msr_list *list;
+ struct kvm_x86_state *state;
+ int nmsrs, r, i;
+ static int nested_size = -1;
+
+ if (nested_size == -1) {
+ nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
+ TEST_ASSERT(nested_size <= sizeof(state->nested_),
+ "Nested state size too big, %i > %zi",
+ nested_size, sizeof(state->nested_));
+ }
+
+ nmsrs = kvm_get_num_msrs(vm);
+ list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
+ list->nmsrs = nmsrs;
+ r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
+ r);
+
+ state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0]));
+ r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
+ r);
+
+ r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
+ r);
+
+ r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
+ r);
+
+ r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
+ r);
+
+ if (kvm_check_cap(KVM_CAP_XCRS)) {
+ r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i",
+ r);
+ }
+
+ r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
+ r);
+
+ if (nested_size) {
+ state->nested.size = sizeof(state->nested_);
+ r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
+ r);
+ TEST_ASSERT(state->nested.size <= nested_size,
+ "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
+ state->nested.size, nested_size);
+ } else
+ state->nested.size = 0;
+
+ state->msrs.nmsrs = nmsrs;
+ for (i = 0; i < nmsrs; i++)
+ state->msrs.entries[i].index = list->indices[i];
+ r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
+ TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)",
+ r, r == nmsrs ? -1 : list->indices[r]);
+
+ r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
+ r);
+
+ free(list);
+ return state;
+}
+
+void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int r;
+
+ if (state->nested.size) {
+ r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
+ r);
+ }
+
+ r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
+ r);
+
+ if (kvm_check_cap(KVM_CAP_XCRS)) {
+ r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i",
+ r);
+ }
+
+ r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
+ r);
+
+ r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
+ TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
+ r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
+
+ r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
+ r);
+
+ r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
+ r);
+
+ r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
+ r);
+
+ r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
+ r);
+}
diff --git a/tools/testing/selftests/kvm/platform_info_test.c b/tools/testing/selftests/kvm/platform_info_test.c
new file mode 100644
index 000000000..65db510dd
--- /dev/null
+++ b/tools/testing/selftests/kvm/platform_info_test.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_CAP_MSR_PLATFORM_INFO
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies expected behavior of controlling guest access to
+ * MSR_PLATFORM_INFO.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+
+#define VCPU_ID 0
+#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
+
+static void guest_code(void)
+{
+ uint64_t msr_platform_info;
+
+ for (;;) {
+ msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
+ GUEST_SYNC(msr_platform_info);
+ asm volatile ("inc %r11");
+ }
+}
+
+static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable)
+{
+ struct kvm_enable_cap cap = {};
+
+ cap.cap = KVM_CAP_MSR_PLATFORM_INFO;
+ cap.flags = 0;
+ cap.args[0] = (int)enable;
+ vm_enable_cap(vm, &cap);
+}
+
+static void test_msr_platform_info_enabled(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct guest_args args;
+
+ set_msr_platform_info_enabled(vm, true);
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Exit_reason other than KVM_EXIT_IO: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ guest_args_read(vm, VCPU_ID, &args);
+ TEST_ASSERT(args.port == GUEST_PORT_SYNC,
+ "Received IO from port other than PORT_HOST_SYNC: %u\n",
+ run->io.port);
+ TEST_ASSERT((args.arg1 & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
+ MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
+ "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
+ MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+}
+
+static void test_msr_platform_info_disabled(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+ set_msr_platform_info_enabled(vm, false);
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
+ "Exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_run *state;
+ int rv;
+ uint64_t msr_platform_info;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO);
+ if (!rv) {
+ fprintf(stderr,
+ "KVM_CAP_MSR_PLATFORM_INFO not supported, skip test\n");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO);
+ vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO,
+ msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+ test_msr_platform_info_enabled(vm);
+ test_msr_platform_info_disabled(vm);
+ vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/set_sregs_test.c b/tools/testing/selftests/kvm/set_sregs_test.c
new file mode 100644
index 000000000..881419d57
--- /dev/null
+++ b/tools/testing/selftests/kvm/set_sregs_test.c
@@ -0,0 +1,54 @@
+/*
+ * KVM_SET_SREGS tests
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * This is a regression test for the bug fixed by the following commit:
+ * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
+ *
+ * That bug allowed a user-mode program that called the KVM_SET_SREGS
+ * ioctl to put a VCPU's local APIC into an invalid state.
+ *
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "x86.h"
+
+#define VCPU_ID 5
+
+int main(int argc, char *argv[])
+{
+ struct kvm_sregs sregs;
+ struct kvm_vm *vm;
+ int rc;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, NULL);
+
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ sregs.apic_base = 1 << 10;
+ rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
+ sregs.apic_base);
+ sregs.apic_base = 1 << 11;
+ rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
+ sregs.apic_base);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/state_test.c b/tools/testing/selftests/kvm/state_test.c
new file mode 100644
index 000000000..900e3e9df
--- /dev/null
+++ b/tools/testing/selftests/kvm/state_test.c
@@ -0,0 +1,196 @@
+/*
+ * KVM_GET/SET_* tests
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Tests for vCPU state save/restore, including nested guest state.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "x86.h"
+#include "vmx.h"
+
+#define VCPU_ID 5
+
+static bool have_nested_state;
+
+void l2_guest_code(void)
+{
+ GUEST_SYNC(5);
+
+ /* Exit to L1 */
+ vmcall();
+
+ /* L1 has now set up a shadow VMCS for us. */
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+ GUEST_SYNC(9);
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
+ GUEST_SYNC(10);
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
+ GUEST_SYNC(11);
+
+ /* Done, exit to L1 and never come back. */
+ vmcall();
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT(vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+ GUEST_SYNC(3);
+ GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(4);
+ GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ /* Check that the launched state is preserved. */
+ GUEST_ASSERT(vmlaunch());
+
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_SYNC(6);
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
+
+ vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
+ vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa);
+
+ GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
+ GUEST_ASSERT(vmlaunch());
+ GUEST_SYNC(7);
+ GUEST_ASSERT(vmlaunch());
+ GUEST_ASSERT(vmresume());
+
+ vmwrite(GUEST_RIP, 0xc0ffee);
+ GUEST_SYNC(8);
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+
+ GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
+ GUEST_ASSERT(vmlaunch());
+ GUEST_ASSERT(vmresume());
+ GUEST_SYNC(12);
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
+ GUEST_ASSERT(vmlaunch());
+ GUEST_ASSERT(vmresume());
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+ GUEST_SYNC(1);
+ GUEST_SYNC(2);
+
+ if (vmx_pages)
+ l1_guest_code(vmx_pages);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct vmx_pages *vmx_pages = NULL;
+ vm_vaddr_t vmx_pages_gva = 0;
+
+ struct kvm_regs regs1, regs2;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct kvm_x86_state *state;
+ int stage;
+
+ struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ run = vcpu_state(vm, VCPU_ID);
+
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+ if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+ vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ } else {
+ printf("will skip nested state checks\n");
+ vcpu_args_set(vm, VCPU_ID, 1, 0);
+ }
+
+ for (stage = 1;; stage++) {
+ _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+ switch (run->io.port) {
+ case GUEST_PORT_ABORT:
+ TEST_ASSERT(false, "%s at %s:%d", (const char *) regs1.rdi,
+ __FILE__, regs1.rsi);
+ /* NOT REACHED */
+ case GUEST_PORT_SYNC:
+ break;
+ case GUEST_PORT_DONE:
+ goto done;
+ default:
+ TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port);
+ }
+
+ /* PORT_SYNC is handled here. */
+ TEST_ASSERT(!strcmp((const char *)regs1.rdi, "hello") &&
+ regs1.rsi == stage, "Unexpected register values vmexit #%lx, got %lx",
+ stage, (ulong) regs1.rsi);
+
+ state = vcpu_save_state(vm, VCPU_ID);
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ kvm_vm_restart(vm, O_RDWR);
+ vm_vcpu_add(vm, VCPU_ID, 0, 0);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_load_state(vm, VCPU_ID, state);
+ run = vcpu_state(vm, VCPU_ID);
+ free(state);
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vm, VCPU_ID, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ }
+
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/sync_regs_test.c
new file mode 100644
index 000000000..213343e5d
--- /dev/null
+++ b/tools/testing/selftests/kvm/sync_regs_test.c
@@ -0,0 +1,237 @@
+/*
+ * Test for x86 KVM_CAP_SYNC_REGS
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality,
+ * including requesting an invalid register set, updates to/from values
+ * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+
+#define VCPU_ID 5
+
+void guest_code(void)
+{
+ for (;;) {
+ GUEST_SYNC(0);
+ asm volatile ("inc %r11");
+ }
+}
+
+static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
+{
+#define REG_COMPARE(reg) \
+ TEST_ASSERT(left->reg == right->reg, \
+ "Register " #reg \
+ " values did not match: 0x%llx, 0x%llx\n", \
+ left->reg, right->reg)
+ REG_COMPARE(rax);
+ REG_COMPARE(rbx);
+ REG_COMPARE(rcx);
+ REG_COMPARE(rdx);
+ REG_COMPARE(rsi);
+ REG_COMPARE(rdi);
+ REG_COMPARE(rsp);
+ REG_COMPARE(rbp);
+ REG_COMPARE(r8);
+ REG_COMPARE(r9);
+ REG_COMPARE(r10);
+ REG_COMPARE(r11);
+ REG_COMPARE(r12);
+ REG_COMPARE(r13);
+ REG_COMPARE(r14);
+ REG_COMPARE(r15);
+ REG_COMPARE(rip);
+ REG_COMPARE(rflags);
+#undef REG_COMPARE
+}
+
+static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right)
+{
+}
+
+static void compare_vcpu_events(struct kvm_vcpu_events *left,
+ struct kvm_vcpu_events *right)
+{
+}
+
+#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
+#define INVALID_SYNC_FIELD 0x80000000
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+ struct kvm_vcpu_events events;
+ int rv, cap;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+ if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) {
+ fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+ if ((cap & INVALID_SYNC_FIELD) != 0) {
+ fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ /* Request reading invalid register set from VCPU. */
+ run->kvm_valid_regs = INVALID_SYNC_FIELD;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+ run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+ /* Request setting invalid register set into VCPU. */
+ run->kvm_dirty_regs = INVALID_SYNC_FIELD;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+ run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+ /* Request and verify all valid register sets. */
+ /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ compare_regs(&regs, &run->s.regs.regs);
+
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ compare_sregs(&sregs, &run->s.regs.sregs);
+
+ vcpu_events_get(vm, VCPU_ID, &events);
+ compare_vcpu_events(&events, &run->s.regs.events);
+
+ /* Set and verify various register values. */
+ run->s.regs.regs.r11 = 0xBAD1DEA;
+ run->s.regs.sregs.apic_base = 1 << 11;
+ /* TODO run->s.regs.events.XYZ = ABC; */
+
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.regs.r11 == 0xBAD1DEA + 1,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.r11);
+ TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11,
+ "apic_base sync regs value incorrect 0x%llx.",
+ run->s.regs.sregs.apic_base);
+
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ compare_regs(&regs, &run->s.regs.regs);
+
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ compare_sregs(&sregs, &run->s.regs.sregs);
+
+ vcpu_events_get(vm, VCPU_ID, &events);
+ compare_vcpu_events(&events, &run->s.regs.events);
+
+ /* Clear kvm_dirty_regs bits, verify new s.regs values are
+ * overwritten with existing guest values.
+ */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ run->kvm_dirty_regs = 0;
+ run->s.regs.regs.r11 = 0xDEADBEEF;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.regs.r11 != 0xDEADBEEF,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.r11);
+
+ /* Clear kvm_valid_regs bits and kvm_dirty_bits.
+ * Verify s.regs values are not overwritten with existing guest values
+ * and that guest values are not overwritten with kvm_sync_regs values.
+ */
+ run->kvm_valid_regs = 0;
+ run->kvm_dirty_regs = 0;
+ run->s.regs.regs.r11 = 0xAAAA;
+ regs.r11 = 0xBAC0;
+ vcpu_regs_set(vm, VCPU_ID, &regs);
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.regs.r11 == 0xAAAA,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.r11);
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ TEST_ASSERT(regs.r11 == 0xBAC0 + 1,
+ "r11 guest value incorrect 0x%llx.",
+ regs.r11);
+
+ /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
+ * with existing guest values but that guest values are overwritten
+ * with kvm_sync_regs values.
+ */
+ run->kvm_valid_regs = 0;
+ run->kvm_dirty_regs = TEST_SYNC_FIELDS;
+ run->s.regs.regs.r11 = 0xBBBB;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.regs.r11 == 0xBBBB,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.r11);
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ TEST_ASSERT(regs.r11 == 0xBBBB + 1,
+ "r11 guest value incorrect 0x%llx.",
+ regs.r11);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
new file mode 100644
index 000000000..49bcc68b0
--- /dev/null
+++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
@@ -0,0 +1,175 @@
+/*
+ * gtests/tests/vmx_tsc_adjust_test.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ *
+ * IA32_TSC_ADJUST test
+ *
+ * According to the SDM, "if an execution of WRMSR to the
+ * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC,
+ * the logical processor also adds (or subtracts) value X from the
+ * IA32_TSC_ADJUST MSR.
+ *
+ * Note that when L1 doesn't intercept writes to IA32_TSC, a
+ * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC
+ * value.
+ *
+ * This test verifies that this unusual case is handled correctly.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "../kselftest.h"
+
+#ifndef MSR_IA32_TSC_ADJUST
+#define MSR_IA32_TSC_ADJUST 0x3b
+#endif
+
+#define PAGE_SIZE 4096
+#define VCPU_ID 5
+
+#define TSC_ADJUST_VALUE (1ll << 32)
+#define TSC_OFFSET_VALUE -(1ll << 48)
+
+enum {
+ PORT_ABORT = 0x1000,
+ PORT_REPORT,
+ PORT_DONE,
+};
+
+enum {
+ VMXON_PAGE = 0,
+ VMCS_PAGE,
+ MSR_BITMAP_PAGE,
+
+ NUM_VMX_PAGES,
+};
+
+struct kvm_single_msr {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+} __attribute__((packed));
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void check_ia32_tsc_adjust(int64_t max)
+{
+ int64_t adjust;
+
+ adjust = rdmsr(MSR_IA32_TSC_ADJUST);
+ GUEST_SYNC(adjust);
+ GUEST_ASSERT(adjust <= max);
+}
+
+static void l2_guest_code(void)
+{
+ uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
+
+ wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE);
+ check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+ /* Exit to L1 */
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+ uintptr_t save_cr3;
+
+ GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
+ wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
+ check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+ vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+ /* Jump into L2. First, test failure to load guest CR3. */
+ save_cr3 = vmreadz(GUEST_CR3);
+ vmwrite(GUEST_CR3, -1ull);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
+ (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
+ check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+ vmwrite(GUEST_CR3, save_cr3);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+ GUEST_DONE();
+}
+
+void report(int64_t val)
+{
+ printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
+ val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
+}
+
+int main(int argc, char *argv[])
+{
+ struct vmx_pages *vmx_pages;
+ vm_vaddr_t vmx_pages_gva;
+ struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+ if (!(entry->ecx & CPUID_VMX)) {
+ fprintf(stderr, "nested VMX not enabled, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ /* Allocate VMX pages and shared descriptors (vmx_pages). */
+ vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct guest_args args;
+
+ vcpu_run(vm, VCPU_ID);
+ guest_args_read(vm, VCPU_ID, &args);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (args.port) {
+ case GUEST_PORT_ABORT:
+ TEST_ASSERT(false, "%s", (const char *) args.arg0);
+ /* NOT REACHED */
+ case GUEST_PORT_SYNC:
+ report(args.arg1);
+ break;
+ case GUEST_PORT_DONE:
+ goto done;
+ default:
+ TEST_ASSERT(false, "Unknown port 0x%x.", args.port);
+ }
+ }
+
+ kvm_vm_free(vm);
+done:
+ return 0;
+}
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
new file mode 100644
index 000000000..9700281be
--- /dev/null
+++ b/tools/testing/selftests/lib.mk
@@ -0,0 +1,169 @@
+# This mimics the top-level Makefile. We do it explicitly here so that this
+# Makefile can operate with or without the kbuild infrastructure.
+ifneq ($(LLVM),)
+CC := clang
+else
+CC := $(CROSS_COMPILE)gcc
+endif
+
+ifeq (0,$(MAKELEVEL))
+OUTPUT := $(shell pwd)
+endif
+
+# The following are built by lib.mk common compile rules.
+# TEST_CUSTOM_PROGS should be used by tests that require
+# custom build rule and prevent common build rule use.
+# TEST_PROGS are for test shell scripts.
+# TEST_CUSTOM_PROGS and TEST_PROGS will be run by common run_tests
+# and install targets. Common clean doesn't touch them.
+TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
+TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
+TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
+
+ifdef KSFT_KHDR_INSTALL
+top_srcdir ?= ../../../..
+include $(top_srcdir)/scripts/subarch.include
+ARCH ?= $(SUBARCH)
+
+.PHONY: khdr
+.NOTPARALLEL:
+khdr:
+ make ARCH=$(ARCH) -C $(top_srcdir) headers_install
+
+all: khdr $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
+else
+all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
+endif
+
+.ONESHELL:
+define RUN_TEST_PRINT_RESULT
+ TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST"; \
+ echo $$TEST_HDR_MSG; \
+ echo "========================================"; \
+ if [ ! -x $$TEST ]; then \
+ echo "$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.";\
+ echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
+ else \
+ cd `dirname $$TEST` > /dev/null; \
+ if [ "X$(summary)" != "X" ]; then \
+ (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && \
+ echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") || \
+ (if [ $$? -eq $$skip ]; then \
+ echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]"; \
+ else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
+ fi;) \
+ else \
+ (./$$BASENAME_TEST && \
+ echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") || \
+ (if [ $$? -eq $$skip ]; then \
+ echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]"; \
+ else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
+ fi;) \
+ fi; \
+ cd - > /dev/null; \
+ fi;
+endef
+
+define RUN_TESTS
+ @export KSFT_TAP_LEVEL=`echo 1`; \
+ test_num=`echo 0`; \
+ skip=`echo 4`; \
+ echo "TAP version 13"; \
+ for TEST in $(1); do \
+ BASENAME_TEST=`basename $$TEST`; \
+ test_num=`echo $$test_num+1 | bc`; \
+ $(call RUN_TEST_PRINT_RESULT,$(TEST),$(BASENAME_TEST),$(test_num),$(skip)) \
+ done;
+endef
+
+run_tests: all
+ifneq ($(KBUILD_SRC),)
+ @if [ "X$(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)" != "X" ]; then
+ @rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT)
+ fi
+ @if [ "X$(TEST_PROGS)" != "X" ]; then
+ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS))
+ else
+ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS))
+ fi
+else
+ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
+endif
+
+define INSTALL_SINGLE_RULE
+ $(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH))
+ $(if $(INSTALL_LIST),@echo rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/)
+ $(if $(INSTALL_LIST),@rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/)
+endef
+
+define INSTALL_RULE
+ $(eval INSTALL_LIST = $(TEST_PROGS)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_FILES)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_GEN_PROGS)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_CUSTOM_PROGS)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_GEN_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_GEN_FILES)) $(INSTALL_SINGLE_RULE)
+endef
+
+install: all
+ifdef INSTALL_PATH
+ $(INSTALL_RULE)
+else
+ $(error Error: set INSTALL_PATH to use install)
+endif
+
+define EMIT_TESTS
+ @test_num=`echo 0`; \
+ for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
+ BASENAME_TEST=`basename $$TEST`; \
+ test_num=`echo $$test_num+1 | bc`; \
+ TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST"; \
+ echo "echo $$TEST_HDR_MSG"; \
+ if [ ! -x $$TEST ]; then \
+ echo "echo \"$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.\""; \
+ echo "echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\""; \
+ else
+ echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"ok 1..$$test_num $$TEST_HDR_MSG [PASS]\") || (if [ \$$? -eq \$$skip ]; then echo \"not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]\"; else echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\"; fi;)"; \
+ fi; \
+ done;
+endef
+
+emit_tests:
+ $(EMIT_TESTS)
+
+# define if isn't already. It is undefined in make O= case.
+ifeq ($(RM),)
+RM := rm -f
+endif
+
+define CLEAN
+ $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
+endef
+
+clean:
+ $(CLEAN)
+
+# When make O= with kselftest target from main level
+# the following aren't defined.
+#
+ifneq ($(KBUILD_SRC),)
+LINK.c = $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
+COMPILE.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c
+LINK.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
+endif
+
+# Selftest makefiles can override those targets by setting
+# OVERRIDE_TARGETS = 1.
+ifeq ($(OVERRIDE_TARGETS),)
+$(OUTPUT)/%:%.c
+ $(LINK.c) $^ $(LDLIBS) -o $@
+
+$(OUTPUT)/%.o:%.S
+ $(COMPILE.S) $^ -o $@
+
+$(OUTPUT)/%:%.S
+ $(LINK.S) $^ $(LDLIBS) -o $@
+endif
+
+.PHONY: run_tests all clean install emit_tests
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
new file mode 100644
index 000000000..70d5711e3
--- /dev/null
+++ b/tools/testing/selftests/lib/Makefile
@@ -0,0 +1,8 @@
+# Makefile for lib/ function selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh
new file mode 100755
index 000000000..5a90006d1
--- /dev/null
+++ b/tools/testing/selftests/lib/bitmap.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Runs bitmap infrastructure tests using test_bitmap kernel module
+if ! /sbin/modprobe -q -n test_bitmap; then
+ echo "bitmap: module test_bitmap is not found [SKIP]"
+ exit $ksft_skip
+fi
+
+if /sbin/modprobe -q test_bitmap; then
+ /sbin/modprobe -q -r test_bitmap
+ echo "bitmap: ok"
+else
+ echo "bitmap: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
new file mode 100644
index 000000000..126933bcc
--- /dev/null
+++ b/tools/testing/selftests/lib/config
@@ -0,0 +1,3 @@
+CONFIG_TEST_PRINTF=m
+CONFIG_TEST_BITMAP=m
+CONFIG_PRIME_NUMBERS=m
diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh
new file mode 100755
index 000000000..78e7483c8
--- /dev/null
+++ b/tools/testing/selftests/lib/prime_numbers.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Checks fast/slow prime_number generation for inconsistencies
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n prime_numbers; then
+ echo "prime_numbers: module prime_numbers is not found [SKIP]"
+ exit $ksft_skip
+fi
+
+if /sbin/modprobe -q prime_numbers selftest=65536; then
+ /sbin/modprobe -q -r prime_numbers
+ echo "prime_numbers: ok"
+else
+ echo "prime_numbers: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh
new file mode 100755
index 000000000..45a23e2d6
--- /dev/null
+++ b/tools/testing/selftests/lib/printf.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs printf infrastructure using test_printf kernel module
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n test_printf; then
+ echo "printf: module test_printf is not found [SKIP]"
+ exit $ksft_skip
+fi
+
+if /sbin/modprobe -q test_printf; then
+ /sbin/modprobe -q -r test_printf
+ echo "printf: ok"
+else
+ echo "printf: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/locking/Makefile b/tools/testing/selftests/locking/Makefile
new file mode 100644
index 000000000..6e7761ab3
--- /dev/null
+++ b/tools/testing/selftests/locking/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for locking/ww_mutx selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := ww_mutex.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/locking/ww_mutex.sh b/tools/testing/selftests/locking/ww_mutex.sh
new file mode 100755
index 000000000..91e4ac756
--- /dev/null
+++ b/tools/testing/selftests/locking/ww_mutex.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Runs API tests for struct ww_mutex (Wait/Wound mutexes)
+if ! /sbin/modprobe -q -n test-ww_mutex; then
+ echo "ww_mutex: module test-ww_mutex is not found [SKIP]"
+ exit $ksft_skip
+fi
+
+if /sbin/modprobe -q test-ww_mutex; then
+ /sbin/modprobe -q -r test-ww_mutex
+ echo "locking/ww_mutex: ok"
+else
+ echo "locking/ww_mutex: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/media_tests/.gitignore b/tools/testing/selftests/media_tests/.gitignore
new file mode 100644
index 000000000..8745eba39
--- /dev/null
+++ b/tools/testing/selftests/media_tests/.gitignore
@@ -0,0 +1,3 @@
+media_device_test
+media_device_open
+video_device_test
diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile
new file mode 100644
index 000000000..60826d7d3
--- /dev/null
+++ b/tools/testing/selftests/media_tests/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+CFLAGS += -I../ -I../../../../usr/include/
+TEST_GEN_PROGS := media_device_test media_device_open video_device_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/media_tests/bind_unbind_sample.sh b/tools/testing/selftests/media_tests/bind_unbind_sample.sh
new file mode 100755
index 000000000..0101c1ec4
--- /dev/null
+++ b/tools/testing/selftests/media_tests/bind_unbind_sample.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Find device number in /sys/bus/usb/drivers/drivername
+# Edit this file to update the driver numer and name
+# Example test for uvcvideo driver
+#i=0
+# while :; do
+# i=$((i+1))
+# echo 1-5:1.0 > /sys/bus/usb/drivers/uvcvideo/unbind;
+# echo 1-5:1.0 > /sys/bus/usb/drivers/uvcvideo/bind;
+# clear
+# echo $i
+#done
diff --git a/tools/testing/selftests/media_tests/media_device_open.c b/tools/testing/selftests/media_tests/media_device_open.c
new file mode 100644
index 000000000..93183a37b
--- /dev/null
+++ b/tools/testing/selftests/media_tests/media_device_open.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * media_device_open.c - Media Controller Device Open Test
+ *
+ * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com>
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ *
+ */
+
+/*
+ * This file adds a test for Media Controller API.
+ * This test should be run as root and should not be
+ * included in the Kselftest run. This test should be
+ * run when hardware and driver that makes use Media
+ * Controller API are present in the system.
+ *
+ * This test opens user specified Media Device and calls
+ * MEDIA_IOC_DEVICE_INFO ioctl, closes the file, and exits.
+ *
+ * Usage:
+ * sudo ./media_device_open -d /dev/mediaX
+ *
+ * Run this test is a loop and run bind/unbind on the driver.
+*/
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <linux/media.h>
+
+#include "../kselftest.h"
+
+int main(int argc, char **argv)
+{
+ int opt;
+ char media_device[256];
+ int count = 0;
+ struct media_device_info mdi;
+ int ret;
+ int fd;
+
+ if (argc < 2) {
+ printf("Usage: %s [-d </dev/mediaX>]\n", argv[0]);
+ exit(-1);
+ }
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "d:")) != -1) {
+ switch (opt) {
+ case 'd':
+ strncpy(media_device, optarg, sizeof(media_device) - 1);
+ media_device[sizeof(media_device)-1] = '\0';
+ break;
+ default:
+ printf("Usage: %s [-d </dev/mediaX>]\n", argv[0]);
+ exit(-1);
+ }
+ }
+
+ if (getuid() != 0)
+ ksft_exit_skip("Please run the test as root - Exiting.\n");
+
+ /* Open Media device and keep it open */
+ fd = open(media_device, O_RDWR);
+ if (fd == -1) {
+ printf("Media Device open errno %s\n", strerror(errno));
+ exit(-1);
+ }
+
+ ret = ioctl(fd, MEDIA_IOC_DEVICE_INFO, &mdi);
+ if (ret < 0)
+ printf("Media Device Info errno %s\n", strerror(errno));
+ else
+ printf("Media device model %s driver %s\n",
+ mdi.model, mdi.driver);
+}
diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c
new file mode 100644
index 000000000..4b9953359
--- /dev/null
+++ b/tools/testing/selftests/media_tests/media_device_test.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * media_device_test.c - Media Controller Device ioctl loop Test
+ *
+ * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com>
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ *
+ */
+
+/*
+ * This file adds a test for Media Controller API.
+ * This test should be run as root and should not be
+ * included in the Kselftest run. This test should be
+ * run when hardware and driver that makes use Media
+ * Controller API are present in the system.
+ *
+ * This test opens user specified Media Device and calls
+ * MEDIA_IOC_DEVICE_INFO ioctl in a loop once every 10
+ * seconds.
+ *
+ * Usage:
+ * sudo ./media_device_test -d /dev/mediaX
+ *
+ * While test is running, remove the device and
+ * ensure there are no use after free errors and
+ * other Oops in the dmesg. Enable KaSan kernel
+ * config option for use-after-free error detection.
+*/
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <linux/media.h>
+
+#include "../kselftest.h"
+
+int main(int argc, char **argv)
+{
+ int opt;
+ char media_device[256];
+ int count;
+ struct media_device_info mdi;
+ int ret;
+ int fd;
+
+ if (argc < 2) {
+ printf("Usage: %s [-d </dev/mediaX>]\n", argv[0]);
+ exit(-1);
+ }
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "d:")) != -1) {
+ switch (opt) {
+ case 'd':
+ strncpy(media_device, optarg, sizeof(media_device) - 1);
+ media_device[sizeof(media_device)-1] = '\0';
+ break;
+ default:
+ printf("Usage: %s [-d </dev/mediaX>]\n", argv[0]);
+ exit(-1);
+ }
+ }
+
+ if (getuid() != 0)
+ ksft_exit_skip("Please run the test as root - Exiting.\n");
+
+ /* Generate random number of interations */
+ srand((unsigned int) time(NULL));
+ count = rand();
+
+ /* Open Media device and keep it open */
+ fd = open(media_device, O_RDWR);
+ if (fd == -1) {
+ printf("Media Device open errno %s\n", strerror(errno));
+ exit(-1);
+ }
+
+ printf("\nNote:\n"
+ "While test is running, remove the device and\n"
+ "ensure there are no use after free errors and\n"
+ "other Oops in the dmesg. Enable KaSan kernel\n"
+ "config option for use-after-free error detection.\n\n");
+
+ printf("Running test for %d iterations\n", count);
+
+ while (count > 0) {
+ ret = ioctl(fd, MEDIA_IOC_DEVICE_INFO, &mdi);
+ if (ret < 0)
+ printf("Media Device Info errno %s\n", strerror(errno));
+ else
+ printf("Media device model %s driver %s - count %d\n",
+ mdi.model, mdi.driver, count);
+ sleep(10);
+ count--;
+ }
+}
diff --git a/tools/testing/selftests/media_tests/open_loop_test.sh b/tools/testing/selftests/media_tests/open_loop_test.sh
new file mode 100755
index 000000000..d4c0179bb
--- /dev/null
+++ b/tools/testing/selftests/media_tests/open_loop_test.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+ i=0
+file=/dev/media$1
+ while :; do
+ echo $file
+ i=$((i+1))
+ R=$(./media_device_open -d $file);
+ # clear
+ echo -e "Loop $i\n$R"
+ done
diff --git a/tools/testing/selftests/media_tests/regression_test.txt b/tools/testing/selftests/media_tests/regression_test.txt
new file mode 100644
index 000000000..262736768
--- /dev/null
+++ b/tools/testing/selftests/media_tests/regression_test.txt
@@ -0,0 +1,43 @@
+Testing for regressions in Media Controller API register, ioctl, syscall,
+and unregister paths. There have a few problems that result in user-after
+free on media_device, media_devnode, and cdev pointers when the driver is
+unbound while ioctl is in progress.
+
+Test Procedure:
+
+Run bin/unbind loop while ioctls are in progress.
+Run rmmod and modprobe.
+Disconnect the device.
+
+Setup:
+
+Build media_device_test
+cd tools/testing/selftests/media_tests
+make
+
+Regressions test for cdev user-after free error on /dev/mediaX when driver
+is unbound:
+
+Start media_device_test to regression test media devnode dynamic alloc
+and cdev user-after-free fixes. This opens media dev files and sits in
+a loop running media ioctl MEDIA_IOC_DEVICE_INFO command once every 10
+seconds. The idea is when device file goes away, media devnode and cdev
+should stick around until this test exits.
+
+The test for a random number of iterations or until user kills it with a
+sleep 10 in between the ioctl calls.
+
+sudo ./media_device_test -d /dev/mediaX
+
+Regression test for media_devnode unregister race with ioctl_syscall:
+
+Start 6 open_loop_test.sh tests with different /dev/mediaX files. When
+device file goes away after unbind, device file name changes. Start the
+test with possible device names. If we start with /dev/media0 for example,
+after unbind, /dev/media1 or /dev/media2 could get created. The idea is
+keep ioctls going while bind/unbind runs.
+
+Copy bind_unbind_sample.txt and make changes to specify the driver name
+and number to run bind and unbind. Start the bind_unbind.sh
+
+Run dmesg looking for any user-after free errors or mutex lock errors.
diff --git a/tools/testing/selftests/media_tests/video_device_test.c b/tools/testing/selftests/media_tests/video_device_test.c
new file mode 100644
index 000000000..0f6aef2e2
--- /dev/null
+++ b/tools/testing/selftests/media_tests/video_device_test.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * video_device_test - Video Device Test
+ *
+ * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com>
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ *
+ */
+
+/*
+ * This file adds a test for Video Device. This test should not be included
+ * in the Kselftest run. This test should be run when hardware and driver
+ * that makes use of V4L2 API is present.
+ *
+ * This test opens user specified Video Device and calls video ioctls in a
+ * loop once every 10 seconds.
+ *
+ * Usage:
+ * sudo ./video_device_test -d /dev/videoX
+ *
+ * While test is running, remove the device or unbind the driver and
+ * ensure there are no use after free errors and other Oops in the
+ * dmesg.
+ * When possible, enable KaSan kernel config option for use-after-free
+ * error detection.
+*/
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <linux/videodev2.h>
+
+int main(int argc, char **argv)
+{
+ int opt;
+ char video_dev[256];
+ int count;
+ struct v4l2_tuner vtuner;
+ struct v4l2_capability vcap;
+ int ret;
+ int fd;
+
+ if (argc < 2) {
+ printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
+ exit(-1);
+ }
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "d:")) != -1) {
+ switch (opt) {
+ case 'd':
+ strncpy(video_dev, optarg, sizeof(video_dev) - 1);
+ video_dev[sizeof(video_dev)-1] = '\0';
+ break;
+ default:
+ printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
+ exit(-1);
+ }
+ }
+
+ /* Generate random number of interations */
+ srand((unsigned int) time(NULL));
+ count = rand();
+
+ /* Open Video device and keep it open */
+ fd = open(video_dev, O_RDWR);
+ if (fd == -1) {
+ printf("Video Device open errno %s\n", strerror(errno));
+ exit(-1);
+ }
+
+ printf("\nNote:\n"
+ "While test is running, remove the device or unbind\n"
+ "driver and ensure there are no use after free errors\n"
+ "and other Oops in the dmesg. When possible, enable KaSan\n"
+ "kernel config option for use-after-free error detection.\n\n");
+
+ while (count > 0) {
+ ret = ioctl(fd, VIDIOC_QUERYCAP, &vcap);
+ if (ret < 0)
+ printf("VIDIOC_QUERYCAP errno %s\n", strerror(errno));
+ else
+ printf("Video device driver %s\n", vcap.driver);
+
+ ret = ioctl(fd, VIDIOC_G_TUNER, &vtuner);
+ if (ret < 0)
+ printf("VIDIOC_G_TUNER, errno %s\n", strerror(errno));
+ else
+ printf("type %d rangelow %d rangehigh %d\n",
+ vtuner.type, vtuner.rangelow, vtuner.rangehigh);
+ sleep(10);
+ count--;
+ }
+}
diff --git a/tools/testing/selftests/membarrier/.gitignore b/tools/testing/selftests/membarrier/.gitignore
new file mode 100644
index 000000000..020c44f49
--- /dev/null
+++ b/tools/testing/selftests/membarrier/.gitignore
@@ -0,0 +1 @@
+membarrier_test
diff --git a/tools/testing/selftests/membarrier/Makefile b/tools/testing/selftests/membarrier/Makefile
new file mode 100644
index 000000000..02845532b
--- /dev/null
+++ b/tools/testing/selftests/membarrier/Makefile
@@ -0,0 +1,6 @@
+CFLAGS += -g -I../../../../usr/include/
+
+TEST_GEN_PROGS := membarrier_test
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c
new file mode 100644
index 000000000..6793f8ecc
--- /dev/null
+++ b/tools/testing/selftests/membarrier/membarrier_test.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/membarrier.h>
+#include <syscall.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#include "../kselftest.h"
+
+static int sys_membarrier(int cmd, int flags)
+{
+ return syscall(__NR_membarrier, cmd, flags);
+}
+
+static int test_membarrier_cmd_fail(void)
+{
+ int cmd = -1, flags = 0;
+ const char *test_name = "sys membarrier invalid command";
+
+ if (sys_membarrier(cmd, flags) != -1) {
+ ksft_exit_fail_msg(
+ "%s test: command = %d, flags = %d. Should fail, but passed\n",
+ test_name, cmd, flags);
+ }
+ if (errno != EINVAL) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+ test_name, flags, EINVAL, strerror(EINVAL),
+ errno, strerror(errno));
+ }
+
+ ksft_test_result_pass(
+ "%s test: command = %d, flags = %d, errno = %d. Failed as expected\n",
+ test_name, cmd, flags, errno);
+ return 0;
+}
+
+static int test_membarrier_flags_fail(void)
+{
+ int cmd = MEMBARRIER_CMD_QUERY, flags = 1;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_QUERY invalid flags";
+
+ if (sys_membarrier(cmd, flags) != -1) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d. Should fail, but passed\n",
+ test_name, flags);
+ }
+ if (errno != EINVAL) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+ test_name, flags, EINVAL, strerror(EINVAL),
+ errno, strerror(errno));
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d, errno = %d. Failed as expected\n",
+ test_name, flags, errno);
+ return 0;
+}
+
+static int test_membarrier_global_success(void)
+{
+ int cmd = MEMBARRIER_CMD_GLOBAL, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL";
+
+ if (sys_membarrier(cmd, flags) != 0) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d\n", test_name, flags);
+ return 0;
+}
+
+static int test_membarrier_private_expedited_fail(void)
+{
+ int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED not registered failure";
+
+ if (sys_membarrier(cmd, flags) != -1) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d. Should fail, but passed\n",
+ test_name, flags);
+ }
+ if (errno != EPERM) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+ test_name, flags, EPERM, strerror(EPERM),
+ errno, strerror(errno));
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ return 0;
+}
+
+static int test_membarrier_register_private_expedited_success(void)
+{
+ int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED";
+
+ if (sys_membarrier(cmd, flags) != 0) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d\n",
+ test_name, flags);
+ return 0;
+}
+
+static int test_membarrier_private_expedited_success(void)
+{
+ int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED";
+
+ if (sys_membarrier(cmd, flags) != 0) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d\n",
+ test_name, flags);
+ return 0;
+}
+
+static int test_membarrier_private_expedited_sync_core_fail(void)
+{
+ int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE not registered failure";
+
+ if (sys_membarrier(cmd, flags) != -1) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d. Should fail, but passed\n",
+ test_name, flags);
+ }
+ if (errno != EPERM) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+ test_name, flags, EPERM, strerror(EPERM),
+ errno, strerror(errno));
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ return 0;
+}
+
+static int test_membarrier_register_private_expedited_sync_core_success(void)
+{
+ int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE";
+
+ if (sys_membarrier(cmd, flags) != 0) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d\n",
+ test_name, flags);
+ return 0;
+}
+
+static int test_membarrier_private_expedited_sync_core_success(void)
+{
+ int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE";
+
+ if (sys_membarrier(cmd, flags) != 0) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d\n",
+ test_name, flags);
+ return 0;
+}
+
+static int test_membarrier_register_global_expedited_success(void)
+{
+ int cmd = MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED";
+
+ if (sys_membarrier(cmd, flags) != 0) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d\n",
+ test_name, flags);
+ return 0;
+}
+
+static int test_membarrier_global_expedited_success(void)
+{
+ int cmd = MEMBARRIER_CMD_GLOBAL_EXPEDITED, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL_EXPEDITED";
+
+ if (sys_membarrier(cmd, flags) != 0) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d\n",
+ test_name, flags);
+ return 0;
+}
+
+static int test_membarrier(void)
+{
+ int status;
+
+ status = test_membarrier_cmd_fail();
+ if (status)
+ return status;
+ status = test_membarrier_flags_fail();
+ if (status)
+ return status;
+ status = test_membarrier_global_success();
+ if (status)
+ return status;
+ status = test_membarrier_private_expedited_fail();
+ if (status)
+ return status;
+ status = test_membarrier_register_private_expedited_success();
+ if (status)
+ return status;
+ status = test_membarrier_private_expedited_success();
+ if (status)
+ return status;
+ status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0);
+ if (status < 0) {
+ ksft_test_result_fail("sys_membarrier() failed\n");
+ return status;
+ }
+ if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
+ status = test_membarrier_private_expedited_sync_core_fail();
+ if (status)
+ return status;
+ status = test_membarrier_register_private_expedited_sync_core_success();
+ if (status)
+ return status;
+ status = test_membarrier_private_expedited_sync_core_success();
+ if (status)
+ return status;
+ }
+ /*
+ * It is valid to send a global membarrier from a non-registered
+ * process.
+ */
+ status = test_membarrier_global_expedited_success();
+ if (status)
+ return status;
+ status = test_membarrier_register_global_expedited_success();
+ if (status)
+ return status;
+ status = test_membarrier_global_expedited_success();
+ if (status)
+ return status;
+ return 0;
+}
+
+static int test_membarrier_query(void)
+{
+ int flags = 0, ret;
+
+ ret = sys_membarrier(MEMBARRIER_CMD_QUERY, flags);
+ if (ret < 0) {
+ if (errno == ENOSYS) {
+ /*
+ * It is valid to build a kernel with
+ * CONFIG_MEMBARRIER=n. However, this skips the tests.
+ */
+ ksft_exit_skip(
+ "sys membarrier (CONFIG_MEMBARRIER) is disabled.\n");
+ }
+ ksft_exit_fail_msg("sys_membarrier() failed\n");
+ }
+ if (!(ret & MEMBARRIER_CMD_GLOBAL))
+ ksft_exit_skip(
+ "sys_membarrier unsupported: CMD_GLOBAL not found.\n");
+
+ ksft_test_result_pass("sys_membarrier available\n");
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+
+ test_membarrier_query();
+ test_membarrier();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/memfd/.gitignore b/tools/testing/selftests/memfd/.gitignore
new file mode 100644
index 000000000..afe87c40a
--- /dev/null
+++ b/tools/testing/selftests/memfd/.gitignore
@@ -0,0 +1,4 @@
+fuse_mnt
+fuse_test
+memfd_test
+memfd-test-file
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
new file mode 100644
index 000000000..53a848109
--- /dev/null
+++ b/tools/testing/selftests/memfd/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -D_FILE_OFFSET_BITS=64
+CFLAGS += -I../../../../include/uapi/
+CFLAGS += -I../../../../include/
+CFLAGS += -I../../../../usr/include/
+
+TEST_GEN_PROGS := memfd_test
+TEST_PROGS := run_fuse_test.sh run_hugetlbfs_test.sh
+TEST_GEN_FILES := fuse_mnt fuse_test
+
+fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
+
+include ../lib.mk
+
+$(OUTPUT)/fuse_mnt: LDLIBS += $(shell pkg-config fuse --libs)
+
+$(OUTPUT)/memfd_test: memfd_test.c common.o
+$(OUTPUT)/fuse_test: fuse_test.c common.o
+
+EXTRA_CLEAN = common.o
diff --git a/tools/testing/selftests/memfd/common.c b/tools/testing/selftests/memfd/common.c
new file mode 100644
index 000000000..8eb3d75f6
--- /dev/null
+++ b/tools/testing/selftests/memfd/common.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/fcntl.h>
+#include <linux/memfd.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "common.h"
+
+int hugetlbfs_test = 0;
+
+/*
+ * Copied from mlock2-tests.c
+ */
+unsigned long default_huge_page_size(void)
+{
+ unsigned long hps = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+
+ if (!f)
+ return 0;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
+ hps <<= 10;
+ break;
+ }
+ }
+
+ free(line);
+ fclose(f);
+ return hps;
+}
+
+int sys_memfd_create(const char *name, unsigned int flags)
+{
+ if (hugetlbfs_test)
+ flags |= MFD_HUGETLB;
+
+ return syscall(__NR_memfd_create, name, flags);
+}
diff --git a/tools/testing/selftests/memfd/common.h b/tools/testing/selftests/memfd/common.h
new file mode 100644
index 000000000..522d2c630
--- /dev/null
+++ b/tools/testing/selftests/memfd/common.h
@@ -0,0 +1,9 @@
+#ifndef COMMON_H_
+#define COMMON_H_
+
+extern int hugetlbfs_test;
+
+unsigned long default_huge_page_size(void);
+int sys_memfd_create(const char *name, unsigned int flags);
+
+#endif
diff --git a/tools/testing/selftests/memfd/config b/tools/testing/selftests/memfd/config
new file mode 100644
index 000000000..835c7f4da
--- /dev/null
+++ b/tools/testing/selftests/memfd/config
@@ -0,0 +1 @@
+CONFIG_FUSE_FS=m
diff --git a/tools/testing/selftests/memfd/fuse_mnt.c b/tools/testing/selftests/memfd/fuse_mnt.c
new file mode 100644
index 000000000..6936f2a00
--- /dev/null
+++ b/tools/testing/selftests/memfd/fuse_mnt.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * memfd test file-system
+ * This file uses FUSE to create a dummy file-system with only one file /memfd.
+ * This file is read-only and takes 1s per read.
+ *
+ * This file-system is used by the memfd test-cases to force the kernel to pin
+ * pages during reads(). Due to the 1s delay of this file-system, this is a
+ * nice way to test race-conditions against get_user_pages() in the kernel.
+ *
+ * We use direct_io==1 to force the kernel to use direct-IO for this
+ * file-system.
+ */
+
+#define FUSE_USE_VERSION 26
+
+#include <fuse.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static const char memfd_content[] = "memfd-example-content";
+static const char memfd_path[] = "/memfd";
+
+static int memfd_getattr(const char *path, struct stat *st)
+{
+ memset(st, 0, sizeof(*st));
+
+ if (!strcmp(path, "/")) {
+ st->st_mode = S_IFDIR | 0755;
+ st->st_nlink = 2;
+ } else if (!strcmp(path, memfd_path)) {
+ st->st_mode = S_IFREG | 0444;
+ st->st_nlink = 1;
+ st->st_size = strlen(memfd_content);
+ } else {
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int memfd_readdir(const char *path,
+ void *buf,
+ fuse_fill_dir_t filler,
+ off_t offset,
+ struct fuse_file_info *fi)
+{
+ if (strcmp(path, "/"))
+ return -ENOENT;
+
+ filler(buf, ".", NULL, 0);
+ filler(buf, "..", NULL, 0);
+ filler(buf, memfd_path + 1, NULL, 0);
+
+ return 0;
+}
+
+static int memfd_open(const char *path, struct fuse_file_info *fi)
+{
+ if (strcmp(path, memfd_path))
+ return -ENOENT;
+
+ if ((fi->flags & 3) != O_RDONLY)
+ return -EACCES;
+
+ /* force direct-IO */
+ fi->direct_io = 1;
+
+ return 0;
+}
+
+static int memfd_read(const char *path,
+ char *buf,
+ size_t size,
+ off_t offset,
+ struct fuse_file_info *fi)
+{
+ size_t len;
+
+ if (strcmp(path, memfd_path) != 0)
+ return -ENOENT;
+
+ sleep(1);
+
+ len = strlen(memfd_content);
+ if (offset < len) {
+ if (offset + size > len)
+ size = len - offset;
+
+ memcpy(buf, memfd_content + offset, size);
+ } else {
+ size = 0;
+ }
+
+ return size;
+}
+
+static struct fuse_operations memfd_ops = {
+ .getattr = memfd_getattr,
+ .readdir = memfd_readdir,
+ .open = memfd_open,
+ .read = memfd_read,
+};
+
+int main(int argc, char *argv[])
+{
+ return fuse_main(argc, argv, &memfd_ops, NULL);
+}
diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
new file mode 100644
index 000000000..b018e8357
--- /dev/null
+++ b/tools/testing/selftests/memfd/fuse_test.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * memfd GUP test-case
+ * This tests memfd interactions with get_user_pages(). We require the
+ * fuse_mnt.c program to provide a fake direct-IO FUSE mount-point for us. This
+ * file-system delays _all_ reads by 1s and forces direct-IO. This means, any
+ * read() on files in that file-system will pin the receive-buffer pages for at
+ * least 1s via get_user_pages().
+ *
+ * We use this trick to race ADD_SEALS against a write on a memfd object. The
+ * ADD_SEALS must fail if the memfd pages are still pinned. Note that we use
+ * the read() syscall with our memory-mapped memfd object as receive buffer to
+ * force the kernel to write into our memfd object.
+ */
+
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <linux/falloc.h>
+#include <linux/fcntl.h>
+#include <linux/memfd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "common.h"
+
+#define MFD_DEF_SIZE 8192
+#define STACK_SIZE 65536
+
+static size_t mfd_def_size = MFD_DEF_SIZE;
+
+static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
+{
+ int r, fd;
+
+ fd = sys_memfd_create(name, flags);
+ if (fd < 0) {
+ printf("memfd_create(\"%s\", %u) failed: %m\n",
+ name, flags);
+ abort();
+ }
+
+ r = ftruncate(fd, sz);
+ if (r < 0) {
+ printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
+ abort();
+ }
+
+ return fd;
+}
+
+static __u64 mfd_assert_get_seals(int fd)
+{
+ long r;
+
+ r = fcntl(fd, F_GET_SEALS);
+ if (r < 0) {
+ printf("GET_SEALS(%d) failed: %m\n", fd);
+ abort();
+ }
+
+ return r;
+}
+
+static void mfd_assert_has_seals(int fd, __u64 seals)
+{
+ __u64 s;
+
+ s = mfd_assert_get_seals(fd);
+ if (s != seals) {
+ printf("%llu != %llu = GET_SEALS(%d)\n",
+ (unsigned long long)seals, (unsigned long long)s, fd);
+ abort();
+ }
+}
+
+static void mfd_assert_add_seals(int fd, __u64 seals)
+{
+ long r;
+ __u64 s;
+
+ s = mfd_assert_get_seals(fd);
+ r = fcntl(fd, F_ADD_SEALS, seals);
+ if (r < 0) {
+ printf("ADD_SEALS(%d, %llu -> %llu) failed: %m\n",
+ fd, (unsigned long long)s, (unsigned long long)seals);
+ abort();
+ }
+}
+
+static int mfd_busy_add_seals(int fd, __u64 seals)
+{
+ long r;
+ __u64 s;
+
+ r = fcntl(fd, F_GET_SEALS);
+ if (r < 0)
+ s = 0;
+ else
+ s = r;
+
+ r = fcntl(fd, F_ADD_SEALS, seals);
+ if (r < 0 && errno != EBUSY) {
+ printf("ADD_SEALS(%d, %llu -> %llu) didn't fail as expected with EBUSY: %m\n",
+ fd, (unsigned long long)s, (unsigned long long)seals);
+ abort();
+ }
+
+ return r;
+}
+
+static void *mfd_assert_mmap_shared(int fd)
+{
+ void *p;
+
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ return p;
+}
+
+static void *mfd_assert_mmap_private(int fd)
+{
+ void *p;
+
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ return p;
+}
+
+static int global_mfd = -1;
+static void *global_p = NULL;
+
+static int sealing_thread_fn(void *arg)
+{
+ int sig, r;
+
+ /*
+ * This thread first waits 200ms so any pending operation in the parent
+ * is correctly started. After that, it tries to seal @global_mfd as
+ * SEAL_WRITE. This _must_ fail as the parent thread has a read() into
+ * that memory mapped object still ongoing.
+ * We then wait one more second and try sealing again. This time it
+ * must succeed as there shouldn't be anyone else pinning the pages.
+ */
+
+ /* wait 200ms for FUSE-request to be active */
+ usleep(200000);
+
+ /* unmount mapping before sealing to avoid i_mmap_writable failures */
+ munmap(global_p, mfd_def_size);
+
+ /* Try sealing the global file; expect EBUSY or success. Current
+ * kernels will never succeed, but in the future, kernels might
+ * implement page-replacements or other fancy ways to avoid racing
+ * writes. */
+ r = mfd_busy_add_seals(global_mfd, F_SEAL_WRITE);
+ if (r >= 0) {
+ printf("HURRAY! This kernel fixed GUP races!\n");
+ } else {
+ /* wait 1s more so the FUSE-request is done */
+ sleep(1);
+
+ /* try sealing the global file again */
+ mfd_assert_add_seals(global_mfd, F_SEAL_WRITE);
+ }
+
+ return 0;
+}
+
+static pid_t spawn_sealing_thread(void)
+{
+ uint8_t *stack;
+ pid_t pid;
+
+ stack = malloc(STACK_SIZE);
+ if (!stack) {
+ printf("malloc(STACK_SIZE) failed: %m\n");
+ abort();
+ }
+
+ pid = clone(sealing_thread_fn,
+ stack + STACK_SIZE,
+ SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM,
+ NULL);
+ if (pid < 0) {
+ printf("clone() failed: %m\n");
+ abort();
+ }
+
+ return pid;
+}
+
+static void join_sealing_thread(pid_t pid)
+{
+ waitpid(pid, NULL, 0);
+}
+
+int main(int argc, char **argv)
+{
+ char *zero;
+ int fd, mfd, r;
+ void *p;
+ int was_sealed;
+ pid_t pid;
+
+ if (argc < 2) {
+ printf("error: please pass path to file in fuse_mnt mount-point\n");
+ abort();
+ }
+
+ if (argc >= 3) {
+ if (!strcmp(argv[2], "hugetlbfs")) {
+ unsigned long hpage_size = default_huge_page_size();
+
+ if (!hpage_size) {
+ printf("Unable to determine huge page size\n");
+ abort();
+ }
+
+ hugetlbfs_test = 1;
+ mfd_def_size = hpage_size * 2;
+ } else {
+ printf("Unknown option: %s\n", argv[2]);
+ abort();
+ }
+ }
+
+ zero = calloc(sizeof(*zero), mfd_def_size);
+
+ /* open FUSE memfd file for GUP testing */
+ printf("opening: %s\n", argv[1]);
+ fd = open(argv[1], O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ printf("cannot open(\"%s\"): %m\n", argv[1]);
+ abort();
+ }
+
+ /* create new memfd-object */
+ mfd = mfd_assert_new("kern_memfd_fuse",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+ /* mmap memfd-object for writing */
+ p = mfd_assert_mmap_shared(mfd);
+
+ /* pass mfd+mapping to a separate sealing-thread which tries to seal
+ * the memfd objects with SEAL_WRITE while we write into it */
+ global_mfd = mfd;
+ global_p = p;
+ pid = spawn_sealing_thread();
+
+ /* Use read() on the FUSE file to read into our memory-mapped memfd
+ * object. This races the other thread which tries to seal the
+ * memfd-object.
+ * If @fd is on the memfd-fake-FUSE-FS, the read() is delayed by 1s.
+ * This guarantees that the receive-buffer is pinned for 1s until the
+ * data is written into it. The racing ADD_SEALS should thus fail as
+ * the pages are still pinned. */
+ r = read(fd, p, mfd_def_size);
+ if (r < 0) {
+ printf("read() failed: %m\n");
+ abort();
+ } else if (!r) {
+ printf("unexpected EOF on read()\n");
+ abort();
+ }
+
+ was_sealed = mfd_assert_get_seals(mfd) & F_SEAL_WRITE;
+
+ /* Wait for sealing-thread to finish and verify that it
+ * successfully sealed the file after the second try. */
+ join_sealing_thread(pid);
+ mfd_assert_has_seals(mfd, F_SEAL_WRITE);
+
+ /* *IF* the memfd-object was sealed at the time our read() returned,
+ * then the kernel did a page-replacement or canceled the read() (or
+ * whatever magic it did..). In that case, the memfd object is still
+ * all zero.
+ * In case the memfd-object was *not* sealed, the read() was successfull
+ * and the memfd object must *not* be all zero.
+ * Note that in real scenarios, there might be a mixture of both, but
+ * in this test-cases, we have explicit 200ms delays which should be
+ * enough to avoid any in-flight writes. */
+
+ p = mfd_assert_mmap_private(mfd);
+ if (was_sealed && memcmp(p, zero, mfd_def_size)) {
+ printf("memfd sealed during read() but data not discarded\n");
+ abort();
+ } else if (!was_sealed && !memcmp(p, zero, mfd_def_size)) {
+ printf("memfd sealed after read() but data discarded\n");
+ abort();
+ }
+
+ close(mfd);
+ close(fd);
+
+ printf("fuse: DONE\n");
+ free(zero);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
new file mode 100644
index 000000000..a4e520b94
--- /dev/null
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -0,0 +1,970 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <linux/falloc.h>
+#include <linux/fcntl.h>
+#include <linux/memfd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "common.h"
+
+#define MEMFD_STR "memfd:"
+#define MEMFD_HUGE_STR "memfd-hugetlb:"
+#define SHARED_FT_STR "(shared file-table)"
+
+#define MFD_DEF_SIZE 8192
+#define STACK_SIZE 65536
+
+/*
+ * Default is not to test hugetlbfs
+ */
+static size_t mfd_def_size = MFD_DEF_SIZE;
+static const char *memfd_str = MEMFD_STR;
+
+static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
+{
+ int r, fd;
+
+ fd = sys_memfd_create(name, flags);
+ if (fd < 0) {
+ printf("memfd_create(\"%s\", %u) failed: %m\n",
+ name, flags);
+ abort();
+ }
+
+ r = ftruncate(fd, sz);
+ if (r < 0) {
+ printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
+ abort();
+ }
+
+ return fd;
+}
+
+static void mfd_fail_new(const char *name, unsigned int flags)
+{
+ int r;
+
+ r = sys_memfd_create(name, flags);
+ if (r >= 0) {
+ printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
+ name, flags);
+ close(r);
+ abort();
+ }
+}
+
+static unsigned int mfd_assert_get_seals(int fd)
+{
+ int r;
+
+ r = fcntl(fd, F_GET_SEALS);
+ if (r < 0) {
+ printf("GET_SEALS(%d) failed: %m\n", fd);
+ abort();
+ }
+
+ return (unsigned int)r;
+}
+
+static void mfd_assert_has_seals(int fd, unsigned int seals)
+{
+ unsigned int s;
+
+ s = mfd_assert_get_seals(fd);
+ if (s != seals) {
+ printf("%u != %u = GET_SEALS(%d)\n", seals, s, fd);
+ abort();
+ }
+}
+
+static void mfd_assert_add_seals(int fd, unsigned int seals)
+{
+ int r;
+ unsigned int s;
+
+ s = mfd_assert_get_seals(fd);
+ r = fcntl(fd, F_ADD_SEALS, seals);
+ if (r < 0) {
+ printf("ADD_SEALS(%d, %u -> %u) failed: %m\n", fd, s, seals);
+ abort();
+ }
+}
+
+static void mfd_fail_add_seals(int fd, unsigned int seals)
+{
+ int r;
+ unsigned int s;
+
+ r = fcntl(fd, F_GET_SEALS);
+ if (r < 0)
+ s = 0;
+ else
+ s = (unsigned int)r;
+
+ r = fcntl(fd, F_ADD_SEALS, seals);
+ if (r >= 0) {
+ printf("ADD_SEALS(%d, %u -> %u) didn't fail as expected\n",
+ fd, s, seals);
+ abort();
+ }
+}
+
+static void mfd_assert_size(int fd, size_t size)
+{
+ struct stat st;
+ int r;
+
+ r = fstat(fd, &st);
+ if (r < 0) {
+ printf("fstat(%d) failed: %m\n", fd);
+ abort();
+ } else if (st.st_size != size) {
+ printf("wrong file size %lld, but expected %lld\n",
+ (long long)st.st_size, (long long)size);
+ abort();
+ }
+}
+
+static int mfd_assert_dup(int fd)
+{
+ int r;
+
+ r = dup(fd);
+ if (r < 0) {
+ printf("dup(%d) failed: %m\n", fd);
+ abort();
+ }
+
+ return r;
+}
+
+static void *mfd_assert_mmap_shared(int fd)
+{
+ void *p;
+
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ return p;
+}
+
+static void *mfd_assert_mmap_private(int fd)
+{
+ void *p;
+
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ,
+ MAP_PRIVATE,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ return p;
+}
+
+static int mfd_assert_open(int fd, int flags, mode_t mode)
+{
+ char buf[512];
+ int r;
+
+ sprintf(buf, "/proc/self/fd/%d", fd);
+ r = open(buf, flags, mode);
+ if (r < 0) {
+ printf("open(%s) failed: %m\n", buf);
+ abort();
+ }
+
+ return r;
+}
+
+static void mfd_fail_open(int fd, int flags, mode_t mode)
+{
+ char buf[512];
+ int r;
+
+ sprintf(buf, "/proc/self/fd/%d", fd);
+ r = open(buf, flags, mode);
+ if (r >= 0) {
+ printf("open(%s) didn't fail as expected\n", buf);
+ abort();
+ }
+}
+
+static void mfd_assert_read(int fd)
+{
+ char buf[16];
+ void *p;
+ ssize_t l;
+
+ l = read(fd, buf, sizeof(buf));
+ if (l != sizeof(buf)) {
+ printf("read() failed: %m\n");
+ abort();
+ }
+
+ /* verify PROT_READ *is* allowed */
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ,
+ MAP_PRIVATE,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+ munmap(p, mfd_def_size);
+
+ /* verify MAP_PRIVATE is *always* allowed (even writable) */
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+ munmap(p, mfd_def_size);
+}
+
+static void mfd_assert_write(int fd)
+{
+ ssize_t l;
+ void *p;
+ int r;
+
+ /*
+ * huegtlbfs does not support write, but we want to
+ * verify everything else here.
+ */
+ if (!hugetlbfs_test) {
+ /* verify write() succeeds */
+ l = write(fd, "\0\0\0\0", 4);
+ if (l != 4) {
+ printf("write() failed: %m\n");
+ abort();
+ }
+ }
+
+ /* verify PROT_READ | PROT_WRITE is allowed */
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+ *(char *)p = 0;
+ munmap(p, mfd_def_size);
+
+ /* verify PROT_WRITE is allowed */
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+ *(char *)p = 0;
+ munmap(p, mfd_def_size);
+
+ /* verify PROT_READ with MAP_SHARED is allowed and a following
+ * mprotect(PROT_WRITE) allows writing */
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
+ if (r < 0) {
+ printf("mprotect() failed: %m\n");
+ abort();
+ }
+
+ *(char *)p = 0;
+ munmap(p, mfd_def_size);
+
+ /* verify PUNCH_HOLE works */
+ r = fallocate(fd,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ 0,
+ mfd_def_size);
+ if (r < 0) {
+ printf("fallocate(PUNCH_HOLE) failed: %m\n");
+ abort();
+ }
+}
+
+static void mfd_fail_write(int fd)
+{
+ ssize_t l;
+ void *p;
+ int r;
+
+ /* verify write() fails */
+ l = write(fd, "data", 4);
+ if (l != -EPERM) {
+ printf("expected EPERM on write(), but got %d: %m\n", (int)l);
+ abort();
+ }
+
+ /* verify PROT_READ | PROT_WRITE is not allowed */
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p != MAP_FAILED) {
+ printf("mmap() didn't fail as expected\n");
+ abort();
+ }
+
+ /* verify PROT_WRITE is not allowed */
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p != MAP_FAILED) {
+ printf("mmap() didn't fail as expected\n");
+ abort();
+ }
+
+ /* Verify PROT_READ with MAP_SHARED with a following mprotect is not
+ * allowed. Note that for r/w the kernel already prevents the mmap. */
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p != MAP_FAILED) {
+ r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
+ if (r >= 0) {
+ printf("mmap()+mprotect() didn't fail as expected\n");
+ abort();
+ }
+ munmap(p, mfd_def_size);
+ }
+
+ /* verify PUNCH_HOLE fails */
+ r = fallocate(fd,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ 0,
+ mfd_def_size);
+ if (r >= 0) {
+ printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
+ abort();
+ }
+}
+
+static void mfd_assert_shrink(int fd)
+{
+ int r, fd2;
+
+ r = ftruncate(fd, mfd_def_size / 2);
+ if (r < 0) {
+ printf("ftruncate(SHRINK) failed: %m\n");
+ abort();
+ }
+
+ mfd_assert_size(fd, mfd_def_size / 2);
+
+ fd2 = mfd_assert_open(fd,
+ O_RDWR | O_CREAT | O_TRUNC,
+ S_IRUSR | S_IWUSR);
+ close(fd2);
+
+ mfd_assert_size(fd, 0);
+}
+
+static void mfd_fail_shrink(int fd)
+{
+ int r;
+
+ r = ftruncate(fd, mfd_def_size / 2);
+ if (r >= 0) {
+ printf("ftruncate(SHRINK) didn't fail as expected\n");
+ abort();
+ }
+
+ mfd_fail_open(fd,
+ O_RDWR | O_CREAT | O_TRUNC,
+ S_IRUSR | S_IWUSR);
+}
+
+static void mfd_assert_grow(int fd)
+{
+ int r;
+
+ r = ftruncate(fd, mfd_def_size * 2);
+ if (r < 0) {
+ printf("ftruncate(GROW) failed: %m\n");
+ abort();
+ }
+
+ mfd_assert_size(fd, mfd_def_size * 2);
+
+ r = fallocate(fd,
+ 0,
+ 0,
+ mfd_def_size * 4);
+ if (r < 0) {
+ printf("fallocate(ALLOC) failed: %m\n");
+ abort();
+ }
+
+ mfd_assert_size(fd, mfd_def_size * 4);
+}
+
+static void mfd_fail_grow(int fd)
+{
+ int r;
+
+ r = ftruncate(fd, mfd_def_size * 2);
+ if (r >= 0) {
+ printf("ftruncate(GROW) didn't fail as expected\n");
+ abort();
+ }
+
+ r = fallocate(fd,
+ 0,
+ 0,
+ mfd_def_size * 4);
+ if (r >= 0) {
+ printf("fallocate(ALLOC) didn't fail as expected\n");
+ abort();
+ }
+}
+
+static void mfd_assert_grow_write(int fd)
+{
+ static char *buf;
+ ssize_t l;
+
+ /* hugetlbfs does not support write */
+ if (hugetlbfs_test)
+ return;
+
+ buf = malloc(mfd_def_size * 8);
+ if (!buf) {
+ printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
+ abort();
+ }
+
+ l = pwrite(fd, buf, mfd_def_size * 8, 0);
+ if (l != (mfd_def_size * 8)) {
+ printf("pwrite() failed: %m\n");
+ abort();
+ }
+
+ mfd_assert_size(fd, mfd_def_size * 8);
+}
+
+static void mfd_fail_grow_write(int fd)
+{
+ static char *buf;
+ ssize_t l;
+
+ /* hugetlbfs does not support write */
+ if (hugetlbfs_test)
+ return;
+
+ buf = malloc(mfd_def_size * 8);
+ if (!buf) {
+ printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
+ abort();
+ }
+
+ l = pwrite(fd, buf, mfd_def_size * 8, 0);
+ if (l == (mfd_def_size * 8)) {
+ printf("pwrite() didn't fail as expected\n");
+ abort();
+ }
+}
+
+static int idle_thread_fn(void *arg)
+{
+ sigset_t set;
+ int sig;
+
+ /* dummy waiter; SIGTERM terminates us anyway */
+ sigemptyset(&set);
+ sigaddset(&set, SIGTERM);
+ sigwait(&set, &sig);
+
+ return 0;
+}
+
+static pid_t spawn_idle_thread(unsigned int flags)
+{
+ uint8_t *stack;
+ pid_t pid;
+
+ stack = malloc(STACK_SIZE);
+ if (!stack) {
+ printf("malloc(STACK_SIZE) failed: %m\n");
+ abort();
+ }
+
+ pid = clone(idle_thread_fn,
+ stack + STACK_SIZE,
+ SIGCHLD | flags,
+ NULL);
+ if (pid < 0) {
+ printf("clone() failed: %m\n");
+ abort();
+ }
+
+ return pid;
+}
+
+static void join_idle_thread(pid_t pid)
+{
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+}
+
+/*
+ * Test memfd_create() syscall
+ * Verify syscall-argument validation, including name checks, flag validation
+ * and more.
+ */
+static void test_create(void)
+{
+ char buf[2048];
+ int fd;
+
+ printf("%s CREATE\n", memfd_str);
+
+ /* test NULL name */
+ mfd_fail_new(NULL, 0);
+
+ /* test over-long name (not zero-terminated) */
+ memset(buf, 0xff, sizeof(buf));
+ mfd_fail_new(buf, 0);
+
+ /* test over-long zero-terminated name */
+ memset(buf, 0xff, sizeof(buf));
+ buf[sizeof(buf) - 1] = 0;
+ mfd_fail_new(buf, 0);
+
+ /* verify "" is a valid name */
+ fd = mfd_assert_new("", 0, 0);
+ close(fd);
+
+ /* verify invalid O_* open flags */
+ mfd_fail_new("", 0x0100);
+ mfd_fail_new("", ~MFD_CLOEXEC);
+ mfd_fail_new("", ~MFD_ALLOW_SEALING);
+ mfd_fail_new("", ~0);
+ mfd_fail_new("", 0x80000000U);
+
+ /* verify MFD_CLOEXEC is allowed */
+ fd = mfd_assert_new("", 0, MFD_CLOEXEC);
+ close(fd);
+
+ /* verify MFD_ALLOW_SEALING is allowed */
+ fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
+ close(fd);
+
+ /* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
+ fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
+ close(fd);
+}
+
+/*
+ * Test basic sealing
+ * A very basic sealing test to see whether setting/retrieving seals works.
+ */
+static void test_basic(void)
+{
+ int fd;
+
+ printf("%s BASIC\n", memfd_str);
+
+ fd = mfd_assert_new("kern_memfd_basic",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+ /* add basic seals */
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_WRITE);
+
+ /* add them again */
+ mfd_assert_add_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_WRITE);
+
+ /* add more seals and seal against sealing */
+ mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_GROW |
+ F_SEAL_WRITE |
+ F_SEAL_SEAL);
+
+ /* verify that sealing no longer works */
+ mfd_fail_add_seals(fd, F_SEAL_GROW);
+ mfd_fail_add_seals(fd, 0);
+
+ close(fd);
+
+ /* verify sealing does not work without MFD_ALLOW_SEALING */
+ fd = mfd_assert_new("kern_memfd_basic",
+ mfd_def_size,
+ MFD_CLOEXEC);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+ mfd_fail_add_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_GROW |
+ F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+ close(fd);
+}
+
+/*
+ * Test SEAL_WRITE
+ * Test whether SEAL_WRITE actually prevents modifications.
+ */
+static void test_seal_write(void)
+{
+ int fd;
+
+ printf("%s SEAL-WRITE\n", memfd_str);
+
+ fd = mfd_assert_new("kern_memfd_seal_write",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE);
+
+ mfd_assert_read(fd);
+ mfd_fail_write(fd);
+ mfd_assert_shrink(fd);
+ mfd_assert_grow(fd);
+ mfd_fail_grow_write(fd);
+
+ close(fd);
+}
+
+/*
+ * Test SEAL_SHRINK
+ * Test whether SEAL_SHRINK actually prevents shrinking
+ */
+static void test_seal_shrink(void)
+{
+ int fd;
+
+ printf("%s SEAL-SHRINK\n", memfd_str);
+
+ fd = mfd_assert_new("kern_memfd_seal_shrink",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK);
+
+ mfd_assert_read(fd);
+ mfd_assert_write(fd);
+ mfd_fail_shrink(fd);
+ mfd_assert_grow(fd);
+ mfd_assert_grow_write(fd);
+
+ close(fd);
+}
+
+/*
+ * Test SEAL_GROW
+ * Test whether SEAL_GROW actually prevents growing
+ */
+static void test_seal_grow(void)
+{
+ int fd;
+
+ printf("%s SEAL-GROW\n", memfd_str);
+
+ fd = mfd_assert_new("kern_memfd_seal_grow",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_GROW);
+ mfd_assert_has_seals(fd, F_SEAL_GROW);
+
+ mfd_assert_read(fd);
+ mfd_assert_write(fd);
+ mfd_assert_shrink(fd);
+ mfd_fail_grow(fd);
+ mfd_fail_grow_write(fd);
+
+ close(fd);
+}
+
+/*
+ * Test SEAL_SHRINK | SEAL_GROW
+ * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
+ */
+static void test_seal_resize(void)
+{
+ int fd;
+
+ printf("%s SEAL-RESIZE\n", memfd_str);
+
+ fd = mfd_assert_new("kern_memfd_seal_resize",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
+
+ mfd_assert_read(fd);
+ mfd_assert_write(fd);
+ mfd_fail_shrink(fd);
+ mfd_fail_grow(fd);
+ mfd_fail_grow_write(fd);
+
+ close(fd);
+}
+
+/*
+ * Test sharing via dup()
+ * Test that seals are shared between dupped FDs and they're all equal.
+ */
+static void test_share_dup(char *banner, char *b_suffix)
+{
+ int fd, fd2;
+
+ printf("%s %s %s\n", memfd_str, banner, b_suffix);
+
+ fd = mfd_assert_new("kern_memfd_share_dup",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+
+ fd2 = mfd_assert_dup(fd);
+ mfd_assert_has_seals(fd2, 0);
+
+ mfd_assert_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE);
+
+ mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+ mfd_assert_add_seals(fd, F_SEAL_SEAL);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+
+ mfd_fail_add_seals(fd, F_SEAL_GROW);
+ mfd_fail_add_seals(fd2, F_SEAL_GROW);
+ mfd_fail_add_seals(fd, F_SEAL_SEAL);
+ mfd_fail_add_seals(fd2, F_SEAL_SEAL);
+
+ close(fd2);
+
+ mfd_fail_add_seals(fd, F_SEAL_GROW);
+ close(fd);
+}
+
+/*
+ * Test sealing with active mmap()s
+ * Modifying seals is only allowed if no other mmap() refs exist.
+ */
+static void test_share_mmap(char *banner, char *b_suffix)
+{
+ int fd;
+ void *p;
+
+ printf("%s %s %s\n", memfd_str, banner, b_suffix);
+
+ fd = mfd_assert_new("kern_memfd_share_mmap",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+
+ /* shared/writable ref prevents sealing WRITE, but allows others */
+ p = mfd_assert_mmap_shared(fd);
+ mfd_fail_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK);
+ munmap(p, mfd_def_size);
+
+ /* readable ref allows sealing */
+ p = mfd_assert_mmap_private(fd);
+ mfd_assert_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+ munmap(p, mfd_def_size);
+
+ close(fd);
+}
+
+/*
+ * Test sealing with open(/proc/self/fd/%d)
+ * Via /proc we can get access to a separate file-context for the same memfd.
+ * This is *not* like dup(), but like a real separate open(). Make sure the
+ * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
+ */
+static void test_share_open(char *banner, char *b_suffix)
+{
+ int fd, fd2;
+
+ printf("%s %s %s\n", memfd_str, banner, b_suffix);
+
+ fd = mfd_assert_new("kern_memfd_share_open",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+
+ fd2 = mfd_assert_open(fd, O_RDWR, 0);
+ mfd_assert_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE);
+
+ mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+ close(fd);
+ fd = mfd_assert_open(fd2, O_RDONLY, 0);
+
+ mfd_fail_add_seals(fd, F_SEAL_SEAL);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+ close(fd2);
+ fd2 = mfd_assert_open(fd, O_RDWR, 0);
+
+ mfd_assert_add_seals(fd2, F_SEAL_SEAL);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+
+ close(fd2);
+ close(fd);
+}
+
+/*
+ * Test sharing via fork()
+ * Test whether seal-modifications work as expected with forked childs.
+ */
+static void test_share_fork(char *banner, char *b_suffix)
+{
+ int fd;
+ pid_t pid;
+
+ printf("%s %s %s\n", memfd_str, banner, b_suffix);
+
+ fd = mfd_assert_new("kern_memfd_share_fork",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+
+ pid = spawn_idle_thread(0);
+ mfd_assert_add_seals(fd, F_SEAL_SEAL);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+ mfd_fail_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+ join_idle_thread(pid);
+
+ mfd_fail_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+ close(fd);
+}
+
+int main(int argc, char **argv)
+{
+ pid_t pid;
+
+ if (argc == 2) {
+ if (!strcmp(argv[1], "hugetlbfs")) {
+ unsigned long hpage_size = default_huge_page_size();
+
+ if (!hpage_size) {
+ printf("Unable to determine huge page size\n");
+ abort();
+ }
+
+ hugetlbfs_test = 1;
+ memfd_str = MEMFD_HUGE_STR;
+ mfd_def_size = hpage_size * 2;
+ } else {
+ printf("Unknown option: %s\n", argv[1]);
+ abort();
+ }
+ }
+
+ test_create();
+ test_basic();
+
+ test_seal_write();
+ test_seal_shrink();
+ test_seal_grow();
+ test_seal_resize();
+
+ test_share_dup("SHARE-DUP", "");
+ test_share_mmap("SHARE-MMAP", "");
+ test_share_open("SHARE-OPEN", "");
+ test_share_fork("SHARE-FORK", "");
+
+ /* Run test-suite in a multi-threaded environment with a shared
+ * file-table. */
+ pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
+ test_share_dup("SHARE-DUP", SHARED_FT_STR);
+ test_share_mmap("SHARE-MMAP", SHARED_FT_STR);
+ test_share_open("SHARE-OPEN", SHARED_FT_STR);
+ test_share_fork("SHARE-FORK", SHARED_FT_STR);
+ join_idle_thread(pid);
+
+ printf("memfd: DONE\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/memfd/run_fuse_test.sh b/tools/testing/selftests/memfd/run_fuse_test.sh
new file mode 100755
index 000000000..22e572e2d
--- /dev/null
+++ b/tools/testing/selftests/memfd/run_fuse_test.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+if test -d "./mnt" ; then
+ fusermount -u ./mnt
+ rmdir ./mnt
+fi
+
+set -e
+
+mkdir mnt
+./fuse_mnt ./mnt
+./fuse_test ./mnt/memfd $@
+fusermount -u ./mnt
+rmdir ./mnt
diff --git a/tools/testing/selftests/memfd/run_hugetlbfs_test.sh b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh
new file mode 100755
index 000000000..fb633eeb0
--- /dev/null
+++ b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+# please run as root
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+#
+# To test memfd_create with hugetlbfs, there needs to be hpages_test
+# huge pages free. Attempt to allocate enough pages to test.
+#
+hpages_test=8
+
+#
+# Get count of free huge pages from /proc/meminfo
+#
+while read name size unit; do
+ if [ "$name" = "HugePages_Free:" ]; then
+ freepgs=$size
+ fi
+done < /proc/meminfo
+
+#
+# If not enough free huge pages for test, attempt to increase
+#
+if [ -n "$freepgs" ] && [ $freepgs -lt $hpages_test ]; then
+ nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
+ hpages_needed=`expr $hpages_test - $freepgs`
+
+ if [ $UID != 0 ]; then
+ echo "Please run memfd with hugetlbfs test as root"
+ exit $ksft_skip
+ fi
+
+ echo 3 > /proc/sys/vm/drop_caches
+ echo $(( $hpages_needed + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
+ while read name size unit; do
+ if [ "$name" = "HugePages_Free:" ]; then
+ freepgs=$size
+ fi
+ done < /proc/meminfo
+fi
+
+#
+# If still not enough huge pages available, exit. But, give back any huge
+# pages potentially allocated above.
+#
+if [ $freepgs -lt $hpages_test ]; then
+ # nr_hugepgs non-zero only if we attempted to increase
+ if [ -n "$nr_hugepgs" ]; then
+ echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
+ fi
+ printf "Not enough huge pages available (%d < %d)\n" \
+ $freepgs $needpgs
+ exit $ksft_skip
+fi
+
+#
+# Run the hugetlbfs test
+#
+./memfd_test hugetlbfs
+./run_fuse_test.sh hugetlbfs
+
+#
+# Give back any huge pages allocated for the test
+#
+if [ -n "$nr_hugepgs" ]; then
+ echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
+fi
diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile
new file mode 100644
index 000000000..e0a625e34
--- /dev/null
+++ b/tools/testing/selftests/memory-hotplug/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+include ../lib.mk
+
+TEST_PROGS := mem-on-off-test.sh
+
+run_full_test:
+ @/bin/bash ./mem-on-off-test.sh -r 10 && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]"
+
+clean:
diff --git a/tools/testing/selftests/memory-hotplug/config b/tools/testing/selftests/memory-hotplug/config
new file mode 100644
index 000000000..a7e8cd5bb
--- /dev/null
+++ b/tools/testing/selftests/memory-hotplug/config
@@ -0,0 +1,5 @@
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTPLUG_SPARSE=y
+CONFIG_NOTIFIER_ERROR_INJECTION=y
+CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
+CONFIG_MEMORY_HOTREMOVE=y
diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
new file mode 100755
index 000000000..b37585e6a
--- /dev/null
+++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
@@ -0,0 +1,291 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+SYSFS=
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+prerequisite()
+{
+ msg="skip all tests:"
+
+ if [ $UID != 0 ]; then
+ echo $msg must be run as root >&2
+ exit $ksft_skip
+ fi
+
+ SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+
+ if [ ! -d "$SYSFS" ]; then
+ echo $msg sysfs is not mounted >&2
+ exit $ksft_skip
+ fi
+
+ if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then
+ echo $msg memory hotplug is not supported >&2
+ exit $ksft_skip
+ fi
+
+ if ! grep -q 1 $SYSFS/devices/system/memory/memory*/removable; then
+ echo $msg no hot-pluggable memory >&2
+ exit $ksft_skip
+ fi
+}
+
+#
+# list all hot-pluggable memory
+#
+hotpluggable_memory()
+{
+ local state=${1:-.\*}
+
+ for memory in $SYSFS/devices/system/memory/memory*; do
+ if grep -q 1 $memory/removable &&
+ grep -q $state $memory/state; then
+ echo ${memory##/*/memory}
+ fi
+ done
+}
+
+hotpluggable_offline_memory()
+{
+ hotpluggable_memory offline
+}
+
+hotpluggable_online_memory()
+{
+ hotpluggable_memory online
+}
+
+memory_is_online()
+{
+ grep -q online $SYSFS/devices/system/memory/memory$1/state
+}
+
+memory_is_offline()
+{
+ grep -q offline $SYSFS/devices/system/memory/memory$1/state
+}
+
+online_memory()
+{
+ echo online > $SYSFS/devices/system/memory/memory$1/state
+}
+
+offline_memory()
+{
+ echo offline > $SYSFS/devices/system/memory/memory$1/state
+}
+
+online_memory_expect_success()
+{
+ local memory=$1
+
+ if ! online_memory $memory; then
+ echo $FUNCNAME $memory: unexpected fail >&2
+ return 1
+ elif ! memory_is_online $memory; then
+ echo $FUNCNAME $memory: unexpected offline >&2
+ return 1
+ fi
+ return 0
+}
+
+online_memory_expect_fail()
+{
+ local memory=$1
+
+ if online_memory $memory 2> /dev/null; then
+ echo $FUNCNAME $memory: unexpected success >&2
+ return 1
+ elif ! memory_is_offline $memory; then
+ echo $FUNCNAME $memory: unexpected online >&2
+ return 1
+ fi
+ return 0
+}
+
+offline_memory_expect_success()
+{
+ local memory=$1
+
+ if ! offline_memory $memory; then
+ echo $FUNCNAME $memory: unexpected fail >&2
+ return 1
+ elif ! memory_is_offline $memory; then
+ echo $FUNCNAME $memory: unexpected offline >&2
+ return 1
+ fi
+ return 0
+}
+
+offline_memory_expect_fail()
+{
+ local memory=$1
+
+ if offline_memory $memory 2> /dev/null; then
+ echo $FUNCNAME $memory: unexpected success >&2
+ return 1
+ elif ! memory_is_online $memory; then
+ echo $FUNCNAME $memory: unexpected offline >&2
+ return 1
+ fi
+ return 0
+}
+
+error=-12
+priority=0
+# Run with default of ratio=2 for Kselftest run
+ratio=2
+retval=0
+
+while getopts e:hp:r: opt; do
+ case $opt in
+ e)
+ error=$OPTARG
+ ;;
+ h)
+ echo "Usage $0 [ -e errno ] [ -p notifier-priority ] [ -r percent-of-memory-to-offline ]"
+ exit
+ ;;
+ p)
+ priority=$OPTARG
+ ;;
+ r)
+ ratio=$OPTARG
+ if [ "$ratio" -gt 100 ] || [ "$ratio" -lt 0 ]; then
+ echo "The percentage should be an integer within 0~100 range"
+ exit 1
+ fi
+ ;;
+ esac
+done
+
+if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
+ echo "error code must be -4095 <= errno < 0" >&2
+ exit 1
+fi
+
+prerequisite
+
+echo "Test scope: $ratio% hotplug memory"
+
+#
+# Online all hot-pluggable memory
+#
+hotpluggable_num=`hotpluggable_offline_memory | wc -l`
+echo -e "\t online all hot-pluggable memory in offline state:"
+if [ "$hotpluggable_num" -gt 0 ]; then
+ for memory in `hotpluggable_offline_memory`; do
+ echo "offline->online memory$memory"
+ if ! online_memory_expect_success $memory; then
+ retval=1
+ fi
+ done
+else
+ echo -e "\t\t SKIPPED - no hot-pluggable memory in offline state"
+fi
+
+#
+# Offline $ratio percent of hot-pluggable memory
+#
+hotpluggable_num=`hotpluggable_online_memory | wc -l`
+target=`echo "a=$hotpluggable_num*$ratio; if ( a%100 ) a/100+1 else a/100" | bc`
+echo -e "\t offline $ratio% hot-pluggable memory in online state"
+echo -e "\t trying to offline $target out of $hotpluggable_num memory block(s):"
+for memory in `hotpluggable_online_memory`; do
+ if [ "$target" -gt 0 ]; then
+ echo "online->offline memory$memory"
+ if offline_memory_expect_success $memory; then
+ target=$(($target - 1))
+ fi
+ fi
+done
+if [ "$target" -gt 0 ]; then
+ retval=1
+ echo -e "\t\t FAILED - unable to offline some memory blocks, device busy?"
+fi
+
+#
+# Online all hot-pluggable memory again
+#
+hotpluggable_num=`hotpluggable_offline_memory | wc -l`
+echo -e "\t online all hot-pluggable memory in offline state:"
+if [ "$hotpluggable_num" -gt 0 ]; then
+ for memory in `hotpluggable_offline_memory`; do
+ echo "offline->online memory$memory"
+ if ! online_memory_expect_success $memory; then
+ retval=1
+ fi
+ done
+else
+ echo -e "\t\t SKIPPED - no hot-pluggable memory in offline state"
+fi
+
+#
+# Test with memory notifier error injection
+#
+
+DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
+NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/memory
+
+prerequisite_extra()
+{
+ msg="skip extra tests:"
+
+ /sbin/modprobe -q -r memory-notifier-error-inject
+ /sbin/modprobe -q memory-notifier-error-inject priority=$priority
+
+ if [ ! -d "$DEBUGFS" ]; then
+ echo $msg debugfs is not mounted >&2
+ exit $retval
+ fi
+
+ if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
+ echo $msg memory-notifier-error-inject module is not available >&2
+ exit $retval
+ fi
+}
+
+echo -e "\t Test with memory notifier error injection"
+prerequisite_extra
+
+#
+# Offline $ratio percent of hot-pluggable memory
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
+for memory in `hotpluggable_online_memory`; do
+ if [ $((RANDOM % 100)) -lt $ratio ]; then
+ offline_memory_expect_success $memory
+ fi
+done
+
+#
+# Test memory hot-add error handling (offline => online)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
+for memory in `hotpluggable_offline_memory`; do
+ online_memory_expect_fail $memory
+done
+
+#
+# Online all hot-pluggable memory
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
+for memory in `hotpluggable_offline_memory`; do
+ online_memory_expect_success $memory
+done
+
+#
+# Test memory hot-remove error handling (online => offline)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
+for memory in `hotpluggable_online_memory`; do
+ offline_memory_expect_fail $memory
+done
+
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
+/sbin/modprobe -q -r memory-notifier-error-inject
+
+exit $retval
diff --git a/tools/testing/selftests/mount/.gitignore b/tools/testing/selftests/mount/.gitignore
new file mode 100644
index 000000000..856ad4107
--- /dev/null
+++ b/tools/testing/selftests/mount/.gitignore
@@ -0,0 +1 @@
+unprivileged-remount-test
diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile
new file mode 100644
index 000000000..026890744
--- /dev/null
+++ b/tools/testing/selftests/mount/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for mount selftests.
+CFLAGS = -Wall \
+ -O2
+
+TEST_PROGS := run_tests.sh
+TEST_GEN_FILES := unprivileged-remount-test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/mount/config b/tools/testing/selftests/mount/config
new file mode 100644
index 000000000..416bd53ce
--- /dev/null
+++ b/tools/testing/selftests/mount/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/mount/run_tests.sh b/tools/testing/selftests/mount/run_tests.sh
new file mode 100755
index 000000000..4ab8f507d
--- /dev/null
+++ b/tools/testing/selftests/mount/run_tests.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Run mount selftests
+if [ -f /proc/self/uid_map ] ; then
+ ./unprivileged-remount-test ;
+else
+ echo "WARN: No /proc/self/uid_map exist, test skipped." ;
+ exit $ksft_skip
+fi
diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c
new file mode 100644
index 000000000..584dc6bc3
--- /dev/null
+++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <stdbool.h>
+#include <stdarg.h>
+
+#ifndef CLONE_NEWNS
+# define CLONE_NEWNS 0x00020000
+#endif
+#ifndef CLONE_NEWUTS
+# define CLONE_NEWUTS 0x04000000
+#endif
+#ifndef CLONE_NEWIPC
+# define CLONE_NEWIPC 0x08000000
+#endif
+#ifndef CLONE_NEWNET
+# define CLONE_NEWNET 0x40000000
+#endif
+#ifndef CLONE_NEWUSER
+# define CLONE_NEWUSER 0x10000000
+#endif
+#ifndef CLONE_NEWPID
+# define CLONE_NEWPID 0x20000000
+#endif
+
+#ifndef MS_REC
+# define MS_REC 16384
+#endif
+#ifndef MS_RELATIME
+# define MS_RELATIME (1 << 21)
+#endif
+#ifndef MS_STRICTATIME
+# define MS_STRICTATIME (1 << 24)
+#endif
+
+static void die(char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ exit(EXIT_FAILURE);
+}
+
+static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
+{
+ char buf[4096];
+ int fd;
+ ssize_t written;
+ int buf_len;
+
+ buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
+ if (buf_len < 0) {
+ die("vsnprintf failed: %s\n",
+ strerror(errno));
+ }
+ if (buf_len >= sizeof(buf)) {
+ die("vsnprintf output truncated\n");
+ }
+
+ fd = open(filename, O_WRONLY);
+ if (fd < 0) {
+ if ((errno == ENOENT) && enoent_ok)
+ return;
+ die("open of %s failed: %s\n",
+ filename, strerror(errno));
+ }
+ written = write(fd, buf, buf_len);
+ if (written != buf_len) {
+ if (written >= 0) {
+ die("short write to %s\n", filename);
+ } else {
+ die("write to %s failed: %s\n",
+ filename, strerror(errno));
+ }
+ }
+ if (close(fd) != 0) {
+ die("close of %s failed: %s\n",
+ filename, strerror(errno));
+ }
+}
+
+static void maybe_write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(true, filename, fmt, ap);
+ va_end(ap);
+
+}
+
+static void write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(false, filename, fmt, ap);
+ va_end(ap);
+
+}
+
+static int read_mnt_flags(const char *path)
+{
+ int ret;
+ struct statvfs stat;
+ int mnt_flags;
+
+ ret = statvfs(path, &stat);
+ if (ret != 0) {
+ die("statvfs of %s failed: %s\n",
+ path, strerror(errno));
+ }
+ if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \
+ ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \
+ ST_SYNCHRONOUS | ST_MANDLOCK)) {
+ die("Unrecognized mount flags\n");
+ }
+ mnt_flags = 0;
+ if (stat.f_flag & ST_RDONLY)
+ mnt_flags |= MS_RDONLY;
+ if (stat.f_flag & ST_NOSUID)
+ mnt_flags |= MS_NOSUID;
+ if (stat.f_flag & ST_NODEV)
+ mnt_flags |= MS_NODEV;
+ if (stat.f_flag & ST_NOEXEC)
+ mnt_flags |= MS_NOEXEC;
+ if (stat.f_flag & ST_NOATIME)
+ mnt_flags |= MS_NOATIME;
+ if (stat.f_flag & ST_NODIRATIME)
+ mnt_flags |= MS_NODIRATIME;
+ if (stat.f_flag & ST_RELATIME)
+ mnt_flags |= MS_RELATIME;
+ if (stat.f_flag & ST_SYNCHRONOUS)
+ mnt_flags |= MS_SYNCHRONOUS;
+ if (stat.f_flag & ST_MANDLOCK)
+ mnt_flags |= ST_MANDLOCK;
+
+ return mnt_flags;
+}
+
+static void create_and_enter_userns(void)
+{
+ uid_t uid;
+ gid_t gid;
+
+ uid = getuid();
+ gid = getgid();
+
+ if (unshare(CLONE_NEWUSER) !=0) {
+ die("unshare(CLONE_NEWUSER) failed: %s\n",
+ strerror(errno));
+ }
+
+ maybe_write_file("/proc/self/setgroups", "deny");
+ write_file("/proc/self/uid_map", "0 %d 1", uid);
+ write_file("/proc/self/gid_map", "0 %d 1", gid);
+
+ if (setgid(0) != 0) {
+ die ("setgid(0) failed %s\n",
+ strerror(errno));
+ }
+ if (setuid(0) != 0) {
+ die("setuid(0) failed %s\n",
+ strerror(errno));
+ }
+}
+
+static
+bool test_unpriv_remount(const char *fstype, const char *mount_options,
+ int mount_flags, int remount_flags, int invalid_flags)
+{
+ pid_t child;
+
+ child = fork();
+ if (child == -1) {
+ die("fork failed: %s\n",
+ strerror(errno));
+ }
+ if (child != 0) { /* parent */
+ pid_t pid;
+ int status;
+ pid = waitpid(child, &status, 0);
+ if (pid == -1) {
+ die("waitpid failed: %s\n",
+ strerror(errno));
+ }
+ if (pid != child) {
+ die("waited for %d got %d\n",
+ child, pid);
+ }
+ if (!WIFEXITED(status)) {
+ die("child did not terminate cleanly\n");
+ }
+ return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+ }
+
+ create_and_enter_userns();
+ if (unshare(CLONE_NEWNS) != 0) {
+ die("unshare(CLONE_NEWNS) failed: %s\n",
+ strerror(errno));
+ }
+
+ if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) {
+ die("mount of %s with options '%s' on /tmp failed: %s\n",
+ fstype,
+ mount_options? mount_options : "",
+ strerror(errno));
+ }
+
+ create_and_enter_userns();
+
+ if (unshare(CLONE_NEWNS) != 0) {
+ die("unshare(CLONE_NEWNS) failed: %s\n",
+ strerror(errno));
+ }
+
+ if (mount("/tmp", "/tmp", "none",
+ MS_REMOUNT | MS_BIND | remount_flags, NULL) != 0) {
+ /* system("cat /proc/self/mounts"); */
+ die("remount of /tmp failed: %s\n",
+ strerror(errno));
+ }
+
+ if (mount("/tmp", "/tmp", "none",
+ MS_REMOUNT | MS_BIND | invalid_flags, NULL) == 0) {
+ /* system("cat /proc/self/mounts"); */
+ die("remount of /tmp with invalid flags "
+ "succeeded unexpectedly\n");
+ }
+ exit(EXIT_SUCCESS);
+}
+
+static bool test_unpriv_remount_simple(int mount_flags)
+{
+ return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0);
+}
+
+static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags)
+{
+ return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags,
+ invalid_flags);
+}
+
+static bool test_priv_mount_unpriv_remount(void)
+{
+ pid_t child;
+ int ret;
+ const char *orig_path = "/dev";
+ const char *dest_path = "/tmp";
+ int orig_mnt_flags, remount_mnt_flags;
+
+ child = fork();
+ if (child == -1) {
+ die("fork failed: %s\n",
+ strerror(errno));
+ }
+ if (child != 0) { /* parent */
+ pid_t pid;
+ int status;
+ pid = waitpid(child, &status, 0);
+ if (pid == -1) {
+ die("waitpid failed: %s\n",
+ strerror(errno));
+ }
+ if (pid != child) {
+ die("waited for %d got %d\n",
+ child, pid);
+ }
+ if (!WIFEXITED(status)) {
+ die("child did not terminate cleanly\n");
+ }
+ return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+ }
+
+ orig_mnt_flags = read_mnt_flags(orig_path);
+
+ create_and_enter_userns();
+ ret = unshare(CLONE_NEWNS);
+ if (ret != 0) {
+ die("unshare(CLONE_NEWNS) failed: %s\n",
+ strerror(errno));
+ }
+
+ ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL);
+ if (ret != 0) {
+ die("recursive bind mount of %s onto %s failed: %s\n",
+ orig_path, dest_path, strerror(errno));
+ }
+
+ ret = mount(dest_path, dest_path, "none",
+ MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL);
+ if (ret != 0) {
+ /* system("cat /proc/self/mounts"); */
+ die("remount of /tmp failed: %s\n",
+ strerror(errno));
+ }
+
+ remount_mnt_flags = read_mnt_flags(dest_path);
+ if (orig_mnt_flags != remount_mnt_flags) {
+ die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n",
+ dest_path, orig_path);
+ }
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+ if (!test_unpriv_remount_simple(MS_RDONLY)) {
+ die("MS_RDONLY malfunctions\n");
+ }
+ if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) {
+ die("MS_NODEV malfunctions\n");
+ }
+ if (!test_unpriv_remount_simple(MS_NOSUID)) {
+ die("MS_NOSUID malfunctions\n");
+ }
+ if (!test_unpriv_remount_simple(MS_NOEXEC)) {
+ die("MS_NOEXEC malfunctions\n");
+ }
+ if (!test_unpriv_remount_atime(MS_RELATIME,
+ MS_NOATIME))
+ {
+ die("MS_RELATIME malfunctions\n");
+ }
+ if (!test_unpriv_remount_atime(MS_STRICTATIME,
+ MS_NOATIME))
+ {
+ die("MS_STRICTATIME malfunctions\n");
+ }
+ if (!test_unpriv_remount_atime(MS_NOATIME,
+ MS_STRICTATIME))
+ {
+ die("MS_NOATIME malfunctions\n");
+ }
+ if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME,
+ MS_NOATIME))
+ {
+ die("MS_RELATIME|MS_NODIRATIME malfunctions\n");
+ }
+ if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME,
+ MS_NOATIME))
+ {
+ die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n");
+ }
+ if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME,
+ MS_STRICTATIME))
+ {
+ die("MS_NOATIME|MS_DIRATIME malfunctions\n");
+ }
+ if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME))
+ {
+ die("Default atime malfunctions\n");
+ }
+ if (!test_priv_mount_unpriv_remount()) {
+ die("Mount flags unexpectedly changed after remount\n");
+ }
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/mqueue/.gitignore b/tools/testing/selftests/mqueue/.gitignore
new file mode 100644
index 000000000..d8d423772
--- /dev/null
+++ b/tools/testing/selftests/mqueue/.gitignore
@@ -0,0 +1,2 @@
+mq_open_tests
+mq_perf_tests
diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile
new file mode 100644
index 000000000..8a58055fc
--- /dev/null
+++ b/tools/testing/selftests/mqueue/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -O2
+LDLIBS = -lrt -lpthread -lpopt
+
+TEST_GEN_PROGS := mq_open_tests mq_perf_tests
+
+include ../lib.mk
diff --git a/tools/testing/selftests/mqueue/mq_open_tests.c b/tools/testing/selftests/mqueue/mq_open_tests.c
new file mode 100644
index 000000000..9403ac01b
--- /dev/null
+++ b/tools/testing/selftests/mqueue/mq_open_tests.c
@@ -0,0 +1,502 @@
+/*
+ * This application is Copyright 2012 Red Hat, Inc.
+ * Doug Ledford <dledford@redhat.com>
+ *
+ * mq_open_tests is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3.
+ *
+ * mq_open_tests is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * For the full text of the license, see <http://www.gnu.org/licenses/>.
+ *
+ * mq_open_tests.c
+ * Tests the various situations that should either succeed or fail to
+ * open a posix message queue and then reports whether or not they
+ * did as they were supposed to.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <mqueue.h>
+#include <error.h>
+
+#include "../kselftest.h"
+
+static char *usage =
+"Usage:\n"
+" %s path\n"
+"\n"
+" path Path name of the message queue to create\n"
+"\n"
+" Note: this program must be run as root in order to enable all tests\n"
+"\n";
+
+char *DEF_MSGS = "/proc/sys/fs/mqueue/msg_default";
+char *DEF_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_default";
+char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max";
+char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max";
+
+int default_settings;
+struct rlimit saved_limits, cur_limits;
+int saved_def_msgs, saved_def_msgsize, saved_max_msgs, saved_max_msgsize;
+int cur_def_msgs, cur_def_msgsize, cur_max_msgs, cur_max_msgsize;
+FILE *def_msgs, *def_msgsize, *max_msgs, *max_msgsize;
+char *queue_path;
+char *default_queue_path = "/test1";
+mqd_t queue = -1;
+
+static inline void __set(FILE *stream, int value, char *err_msg);
+void shutdown(int exit_val, char *err_cause, int line_no);
+static inline int get(FILE *stream);
+static inline void set(FILE *stream, int value);
+static inline void getr(int type, struct rlimit *rlim);
+static inline void setr(int type, struct rlimit *rlim);
+void validate_current_settings();
+static inline void test_queue(struct mq_attr *attr, struct mq_attr *result);
+static inline int test_queue_fail(struct mq_attr *attr, struct mq_attr *result);
+
+static inline void __set(FILE *stream, int value, char *err_msg)
+{
+ rewind(stream);
+ if (fprintf(stream, "%d", value) < 0)
+ perror(err_msg);
+}
+
+
+void shutdown(int exit_val, char *err_cause, int line_no)
+{
+ static int in_shutdown = 0;
+
+ /* In case we get called recursively by a set() call below */
+ if (in_shutdown++)
+ return;
+
+ if (seteuid(0) == -1)
+ perror("seteuid() failed");
+
+ if (queue != -1)
+ if (mq_close(queue))
+ perror("mq_close() during shutdown");
+ if (queue_path)
+ /*
+ * Be silent if this fails, if we cleaned up already it's
+ * expected to fail
+ */
+ mq_unlink(queue_path);
+ if (default_settings) {
+ if (saved_def_msgs)
+ __set(def_msgs, saved_def_msgs,
+ "failed to restore saved_def_msgs");
+ if (saved_def_msgsize)
+ __set(def_msgsize, saved_def_msgsize,
+ "failed to restore saved_def_msgsize");
+ }
+ if (saved_max_msgs)
+ __set(max_msgs, saved_max_msgs,
+ "failed to restore saved_max_msgs");
+ if (saved_max_msgsize)
+ __set(max_msgsize, saved_max_msgsize,
+ "failed to restore saved_max_msgsize");
+ if (exit_val)
+ error(exit_val, errno, "%s at %d", err_cause, line_no);
+ exit(0);
+}
+
+static inline int get(FILE *stream)
+{
+ int value;
+ rewind(stream);
+ if (fscanf(stream, "%d", &value) != 1)
+ shutdown(4, "Error reading /proc entry", __LINE__ - 1);
+ return value;
+}
+
+static inline void set(FILE *stream, int value)
+{
+ int new_value;
+
+ rewind(stream);
+ if (fprintf(stream, "%d", value) < 0)
+ return shutdown(5, "Failed writing to /proc file",
+ __LINE__ - 1);
+ new_value = get(stream);
+ if (new_value != value)
+ return shutdown(5, "We didn't get what we wrote to /proc back",
+ __LINE__ - 1);
+}
+
+static inline void getr(int type, struct rlimit *rlim)
+{
+ if (getrlimit(type, rlim))
+ shutdown(6, "getrlimit()", __LINE__ - 1);
+}
+
+static inline void setr(int type, struct rlimit *rlim)
+{
+ if (setrlimit(type, rlim))
+ shutdown(7, "setrlimit()", __LINE__ - 1);
+}
+
+void validate_current_settings()
+{
+ int rlim_needed;
+
+ if (cur_limits.rlim_cur < 4096) {
+ printf("Current rlimit value for POSIX message queue bytes is "
+ "unreasonably low,\nincreasing.\n\n");
+ cur_limits.rlim_cur = 8192;
+ cur_limits.rlim_max = 16384;
+ setr(RLIMIT_MSGQUEUE, &cur_limits);
+ }
+
+ if (default_settings) {
+ rlim_needed = (cur_def_msgs + 1) * (cur_def_msgsize + 1 +
+ 2 * sizeof(void *));
+ if (rlim_needed > cur_limits.rlim_cur) {
+ printf("Temporarily lowering default queue parameters "
+ "to something that will work\n"
+ "with the current rlimit values.\n\n");
+ set(def_msgs, 10);
+ cur_def_msgs = 10;
+ set(def_msgsize, 128);
+ cur_def_msgsize = 128;
+ }
+ } else {
+ rlim_needed = (cur_max_msgs + 1) * (cur_max_msgsize + 1 +
+ 2 * sizeof(void *));
+ if (rlim_needed > cur_limits.rlim_cur) {
+ printf("Temporarily lowering maximum queue parameters "
+ "to something that will work\n"
+ "with the current rlimit values in case this is "
+ "a kernel that ties the default\n"
+ "queue parameters to the maximum queue "
+ "parameters.\n\n");
+ set(max_msgs, 10);
+ cur_max_msgs = 10;
+ set(max_msgsize, 128);
+ cur_max_msgsize = 128;
+ }
+ }
+}
+
+/*
+ * test_queue - Test opening a queue, shutdown if we fail. This should
+ * only be called in situations that should never fail. We clean up
+ * after ourselves and return the queue attributes in *result.
+ */
+static inline void test_queue(struct mq_attr *attr, struct mq_attr *result)
+{
+ int flags = O_RDWR | O_EXCL | O_CREAT;
+ int perms = DEFFILEMODE;
+
+ if ((queue = mq_open(queue_path, flags, perms, attr)) == -1)
+ shutdown(1, "mq_open()", __LINE__);
+ if (mq_getattr(queue, result))
+ shutdown(1, "mq_getattr()", __LINE__);
+ if (mq_close(queue))
+ shutdown(1, "mq_close()", __LINE__);
+ queue = -1;
+ if (mq_unlink(queue_path))
+ shutdown(1, "mq_unlink()", __LINE__);
+}
+
+/*
+ * Same as test_queue above, but failure is not fatal.
+ * Returns:
+ * 0 - Failed to create a queue
+ * 1 - Created a queue, attributes in *result
+ */
+static inline int test_queue_fail(struct mq_attr *attr, struct mq_attr *result)
+{
+ int flags = O_RDWR | O_EXCL | O_CREAT;
+ int perms = DEFFILEMODE;
+
+ if ((queue = mq_open(queue_path, flags, perms, attr)) == -1)
+ return 0;
+ if (mq_getattr(queue, result))
+ shutdown(1, "mq_getattr()", __LINE__);
+ if (mq_close(queue))
+ shutdown(1, "mq_close()", __LINE__);
+ queue = -1;
+ if (mq_unlink(queue_path))
+ shutdown(1, "mq_unlink()", __LINE__);
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ struct mq_attr attr, result;
+
+ if (argc != 2) {
+ printf("Using Default queue path - %s\n", default_queue_path);
+ queue_path = default_queue_path;
+ } else {
+
+ /*
+ * Although we can create a msg queue with a non-absolute path name,
+ * unlink will fail. So, if the name doesn't start with a /, add one
+ * when we save it.
+ */
+ if (*argv[1] == '/')
+ queue_path = strdup(argv[1]);
+ else {
+ queue_path = malloc(strlen(argv[1]) + 2);
+ if (!queue_path) {
+ perror("malloc()");
+ exit(1);
+ }
+ queue_path[0] = '/';
+ queue_path[1] = 0;
+ strcat(queue_path, argv[1]);
+ }
+ }
+
+ if (getuid() != 0)
+ ksft_exit_skip("Not running as root, but almost all tests "
+ "require root in order to modify\nsystem settings. "
+ "Exiting.\n");
+
+ /* Find out what files there are for us to make tweaks in */
+ def_msgs = fopen(DEF_MSGS, "r+");
+ def_msgsize = fopen(DEF_MSGSIZE, "r+");
+ max_msgs = fopen(MAX_MSGS, "r+");
+ max_msgsize = fopen(MAX_MSGSIZE, "r+");
+
+ if (!max_msgs)
+ shutdown(2, "Failed to open msg_max", __LINE__);
+ if (!max_msgsize)
+ shutdown(2, "Failed to open msgsize_max", __LINE__);
+ if (def_msgs || def_msgsize)
+ default_settings = 1;
+
+ /* Load up the current system values for everything we can */
+ getr(RLIMIT_MSGQUEUE, &saved_limits);
+ cur_limits = saved_limits;
+ if (default_settings) {
+ saved_def_msgs = cur_def_msgs = get(def_msgs);
+ saved_def_msgsize = cur_def_msgsize = get(def_msgsize);
+ }
+ saved_max_msgs = cur_max_msgs = get(max_msgs);
+ saved_max_msgsize = cur_max_msgsize = get(max_msgsize);
+
+ /* Tell the user our initial state */
+ printf("\nInitial system state:\n");
+ printf("\tUsing queue path:\t\t%s\n", queue_path);
+ printf("\tRLIMIT_MSGQUEUE(soft):\t\t%ld\n",
+ (long) saved_limits.rlim_cur);
+ printf("\tRLIMIT_MSGQUEUE(hard):\t\t%ld\n",
+ (long) saved_limits.rlim_max);
+ printf("\tMaximum Message Size:\t\t%d\n", saved_max_msgsize);
+ printf("\tMaximum Queue Size:\t\t%d\n", saved_max_msgs);
+ if (default_settings) {
+ printf("\tDefault Message Size:\t\t%d\n", saved_def_msgsize);
+ printf("\tDefault Queue Size:\t\t%d\n", saved_def_msgs);
+ } else {
+ printf("\tDefault Message Size:\t\tNot Supported\n");
+ printf("\tDefault Queue Size:\t\tNot Supported\n");
+ }
+ printf("\n");
+
+ validate_current_settings();
+
+ printf("Adjusted system state for testing:\n");
+ printf("\tRLIMIT_MSGQUEUE(soft):\t\t%ld\n", (long) cur_limits.rlim_cur);
+ printf("\tRLIMIT_MSGQUEUE(hard):\t\t%ld\n", (long) cur_limits.rlim_max);
+ printf("\tMaximum Message Size:\t\t%d\n", cur_max_msgsize);
+ printf("\tMaximum Queue Size:\t\t%d\n", cur_max_msgs);
+ if (default_settings) {
+ printf("\tDefault Message Size:\t\t%d\n", cur_def_msgsize);
+ printf("\tDefault Queue Size:\t\t%d\n", cur_def_msgs);
+ }
+
+ printf("\n\nTest series 1, behavior when no attr struct "
+ "passed to mq_open:\n");
+ if (!default_settings) {
+ test_queue(NULL, &result);
+ printf("Given sane system settings, mq_open without an attr "
+ "struct succeeds:\tPASS\n");
+ if (result.mq_maxmsg != cur_max_msgs ||
+ result.mq_msgsize != cur_max_msgsize) {
+ printf("Kernel does not support setting the default "
+ "mq attributes,\nbut also doesn't tie the "
+ "defaults to the maximums:\t\t\tPASS\n");
+ } else {
+ set(max_msgs, ++cur_max_msgs);
+ set(max_msgsize, ++cur_max_msgsize);
+ test_queue(NULL, &result);
+ if (result.mq_maxmsg == cur_max_msgs &&
+ result.mq_msgsize == cur_max_msgsize)
+ printf("Kernel does not support setting the "
+ "default mq attributes and\n"
+ "also ties system wide defaults to "
+ "the system wide maximums:\t\t"
+ "FAIL\n");
+ else
+ printf("Kernel does not support setting the "
+ "default mq attributes,\n"
+ "but also doesn't tie the defaults to "
+ "the maximums:\t\t\tPASS\n");
+ }
+ } else {
+ printf("Kernel supports setting defaults separately from "
+ "maximums:\t\tPASS\n");
+ /*
+ * While we are here, go ahead and test that the kernel
+ * properly follows the default settings
+ */
+ test_queue(NULL, &result);
+ printf("Given sane values, mq_open without an attr struct "
+ "succeeds:\t\tPASS\n");
+ if (result.mq_maxmsg != cur_def_msgs ||
+ result.mq_msgsize != cur_def_msgsize)
+ printf("Kernel supports setting defaults, but does "
+ "not actually honor them:\tFAIL\n\n");
+ else {
+ set(def_msgs, ++cur_def_msgs);
+ set(def_msgsize, ++cur_def_msgsize);
+ /* In case max was the same as the default */
+ set(max_msgs, ++cur_max_msgs);
+ set(max_msgsize, ++cur_max_msgsize);
+ test_queue(NULL, &result);
+ if (result.mq_maxmsg != cur_def_msgs ||
+ result.mq_msgsize != cur_def_msgsize)
+ printf("Kernel supports setting defaults, but "
+ "does not actually honor them:\t"
+ "FAIL\n");
+ else
+ printf("Kernel properly honors default setting "
+ "knobs:\t\t\t\tPASS\n");
+ }
+ set(def_msgs, cur_max_msgs + 1);
+ cur_def_msgs = cur_max_msgs + 1;
+ set(def_msgsize, cur_max_msgsize + 1);
+ cur_def_msgsize = cur_max_msgsize + 1;
+ if (cur_def_msgs * (cur_def_msgsize + 2 * sizeof(void *)) >=
+ cur_limits.rlim_cur) {
+ cur_limits.rlim_cur = (cur_def_msgs + 2) *
+ (cur_def_msgsize + 2 * sizeof(void *));
+ cur_limits.rlim_max = 2 * cur_limits.rlim_cur;
+ setr(RLIMIT_MSGQUEUE, &cur_limits);
+ }
+ if (test_queue_fail(NULL, &result)) {
+ if (result.mq_maxmsg == cur_max_msgs &&
+ result.mq_msgsize == cur_max_msgsize)
+ printf("Kernel properly limits default values "
+ "to lesser of default/max:\t\tPASS\n");
+ else
+ printf("Kernel does not properly set default "
+ "queue parameters when\ndefaults > "
+ "max:\t\t\t\t\t\t\t\tFAIL\n");
+ } else
+ printf("Kernel fails to open mq because defaults are "
+ "greater than maximums:\tFAIL\n");
+ set(def_msgs, --cur_def_msgs);
+ set(def_msgsize, --cur_def_msgsize);
+ cur_limits.rlim_cur = cur_limits.rlim_max = cur_def_msgs *
+ cur_def_msgsize;
+ setr(RLIMIT_MSGQUEUE, &cur_limits);
+ if (test_queue_fail(NULL, &result))
+ printf("Kernel creates queue even though defaults "
+ "would exceed\nrlimit setting:"
+ "\t\t\t\t\t\t\t\tFAIL\n");
+ else
+ printf("Kernel properly fails to create queue when "
+ "defaults would\nexceed rlimit:"
+ "\t\t\t\t\t\t\t\tPASS\n");
+ }
+
+ /*
+ * Test #2 - open with an attr struct that exceeds rlimit
+ */
+ printf("\n\nTest series 2, behavior when attr struct is "
+ "passed to mq_open:\n");
+ cur_max_msgs = 32;
+ cur_max_msgsize = cur_limits.rlim_max >> 4;
+ set(max_msgs, cur_max_msgs);
+ set(max_msgsize, cur_max_msgsize);
+ attr.mq_maxmsg = cur_max_msgs;
+ attr.mq_msgsize = cur_max_msgsize;
+ if (test_queue_fail(&attr, &result))
+ printf("Queue open in excess of rlimit max when euid = 0 "
+ "succeeded:\t\tFAIL\n");
+ else
+ printf("Queue open in excess of rlimit max when euid = 0 "
+ "failed:\t\tPASS\n");
+ attr.mq_maxmsg = cur_max_msgs + 1;
+ attr.mq_msgsize = 10;
+ if (test_queue_fail(&attr, &result))
+ printf("Queue open with mq_maxmsg > limit when euid = 0 "
+ "succeeded:\t\tPASS\n");
+ else
+ printf("Queue open with mq_maxmsg > limit when euid = 0 "
+ "failed:\t\tFAIL\n");
+ attr.mq_maxmsg = 1;
+ attr.mq_msgsize = cur_max_msgsize + 1;
+ if (test_queue_fail(&attr, &result))
+ printf("Queue open with mq_msgsize > limit when euid = 0 "
+ "succeeded:\t\tPASS\n");
+ else
+ printf("Queue open with mq_msgsize > limit when euid = 0 "
+ "failed:\t\tFAIL\n");
+ attr.mq_maxmsg = 65536;
+ attr.mq_msgsize = 65536;
+ if (test_queue_fail(&attr, &result))
+ printf("Queue open with total size > 2GB when euid = 0 "
+ "succeeded:\t\tFAIL\n");
+ else
+ printf("Queue open with total size > 2GB when euid = 0 "
+ "failed:\t\t\tPASS\n");
+
+ if (seteuid(99) == -1) {
+ perror("seteuid() failed");
+ exit(1);
+ }
+
+ attr.mq_maxmsg = cur_max_msgs;
+ attr.mq_msgsize = cur_max_msgsize;
+ if (test_queue_fail(&attr, &result))
+ printf("Queue open in excess of rlimit max when euid = 99 "
+ "succeeded:\t\tFAIL\n");
+ else
+ printf("Queue open in excess of rlimit max when euid = 99 "
+ "failed:\t\tPASS\n");
+ attr.mq_maxmsg = cur_max_msgs + 1;
+ attr.mq_msgsize = 10;
+ if (test_queue_fail(&attr, &result))
+ printf("Queue open with mq_maxmsg > limit when euid = 99 "
+ "succeeded:\t\tFAIL\n");
+ else
+ printf("Queue open with mq_maxmsg > limit when euid = 99 "
+ "failed:\t\tPASS\n");
+ attr.mq_maxmsg = 1;
+ attr.mq_msgsize = cur_max_msgsize + 1;
+ if (test_queue_fail(&attr, &result))
+ printf("Queue open with mq_msgsize > limit when euid = 99 "
+ "succeeded:\t\tFAIL\n");
+ else
+ printf("Queue open with mq_msgsize > limit when euid = 99 "
+ "failed:\t\tPASS\n");
+ attr.mq_maxmsg = 65536;
+ attr.mq_msgsize = 65536;
+ if (test_queue_fail(&attr, &result))
+ printf("Queue open with total size > 2GB when euid = 99 "
+ "succeeded:\t\tFAIL\n");
+ else
+ printf("Queue open with total size > 2GB when euid = 99 "
+ "failed:\t\t\tPASS\n");
+
+ shutdown(0,"",0);
+}
diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
new file mode 100644
index 000000000..84fda3b49
--- /dev/null
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@@ -0,0 +1,752 @@
+/*
+ * This application is Copyright 2012 Red Hat, Inc.
+ * Doug Ledford <dledford@redhat.com>
+ *
+ * mq_perf_tests is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3.
+ *
+ * mq_perf_tests is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * For the full text of the license, see <http://www.gnu.org/licenses/>.
+ *
+ * mq_perf_tests.c
+ * Tests various types of message queue workloads, concentrating on those
+ * situations that invole large message sizes, large message queue depths,
+ * or both, and reports back useful metrics about kernel message queue
+ * performance.
+ *
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <mqueue.h>
+#include <popt.h>
+#include <error.h>
+
+#include "../kselftest.h"
+
+static char *usage =
+"Usage:\n"
+" %s [-c #[,#..] -f] path\n"
+"\n"
+" -c # Skip most tests and go straight to a high queue depth test\n"
+" and then run that test continuously (useful for running at\n"
+" the same time as some other workload to see how much the\n"
+" cache thrashing caused by adding messages to a very deep\n"
+" queue impacts the performance of other programs). The number\n"
+" indicates which CPU core we should bind the process to during\n"
+" the run. If you have more than one physical CPU, then you\n"
+" will need one copy per physical CPU package, and you should\n"
+" specify the CPU cores to pin ourself to via a comma separated\n"
+" list of CPU values.\n"
+" -f Only usable with continuous mode. Pin ourself to the CPUs\n"
+" as requested, then instead of looping doing a high mq\n"
+" workload, just busy loop. This will allow us to lock up a\n"
+" single CPU just like we normally would, but without actually\n"
+" thrashing the CPU cache. This is to make it easier to get\n"
+" comparable numbers from some other workload running on the\n"
+" other CPUs. One set of numbers with # CPUs locked up running\n"
+" an mq workload, and another set of numbers with those same\n"
+" CPUs locked away from the test workload, but not doing\n"
+" anything to trash the cache like the mq workload might.\n"
+" path Path name of the message queue to create\n"
+"\n"
+" Note: this program must be run as root in order to enable all tests\n"
+"\n";
+
+char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max";
+char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max";
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define MAX_CPUS 64
+char *cpu_option_string;
+int cpus_to_pin[MAX_CPUS];
+int num_cpus_to_pin;
+pthread_t cpu_threads[MAX_CPUS];
+pthread_t main_thread;
+cpu_set_t *cpu_set;
+int cpu_set_size;
+int cpus_online;
+
+#define MSG_SIZE 16
+#define TEST1_LOOPS 10000000
+#define TEST2_LOOPS 100000
+int continuous_mode;
+int continuous_mode_fake;
+
+struct rlimit saved_limits, cur_limits;
+int saved_max_msgs, saved_max_msgsize;
+int cur_max_msgs, cur_max_msgsize;
+FILE *max_msgs, *max_msgsize;
+int cur_nice;
+char *queue_path = "/mq_perf_tests";
+mqd_t queue = -1;
+struct mq_attr result;
+int mq_prio_max;
+
+const struct poptOption options[] = {
+ {
+ .longName = "continuous",
+ .shortName = 'c',
+ .argInfo = POPT_ARG_STRING,
+ .arg = &cpu_option_string,
+ .val = 'c',
+ .descrip = "Run continuous tests at a high queue depth in "
+ "order to test the effects of cache thrashing on "
+ "other tasks on the system. This test is intended "
+ "to be run on one core of each physical CPU while "
+ "some other CPU intensive task is run on all the other "
+ "cores of that same physical CPU and the other task "
+ "is timed. It is assumed that the process of adding "
+ "messages to the message queue in a tight loop will "
+ "impact that other task to some degree. Once the "
+ "tests are performed in this way, you should then "
+ "re-run the tests using fake mode in order to check "
+ "the difference in time required to perform the CPU "
+ "intensive task",
+ .argDescrip = "cpu[,cpu]",
+ },
+ {
+ .longName = "fake",
+ .shortName = 'f',
+ .argInfo = POPT_ARG_NONE,
+ .arg = &continuous_mode_fake,
+ .val = 0,
+ .descrip = "Tie up the CPUs that we would normally tie up in"
+ "continuous mode, but don't actually do any mq stuff, "
+ "just keep the CPU busy so it can't be used to process "
+ "system level tasks as this would free up resources on "
+ "the other CPU cores and skew the comparison between "
+ "the no-mqueue work and mqueue work tests",
+ .argDescrip = NULL,
+ },
+ {
+ .longName = "path",
+ .shortName = 'p',
+ .argInfo = POPT_ARG_STRING | POPT_ARGFLAG_SHOW_DEFAULT,
+ .arg = &queue_path,
+ .val = 'p',
+ .descrip = "The name of the path to use in the mqueue "
+ "filesystem for our tests",
+ .argDescrip = "pathname",
+ },
+ POPT_AUTOHELP
+ POPT_TABLEEND
+};
+
+static inline void __set(FILE *stream, int value, char *err_msg);
+void shutdown(int exit_val, char *err_cause, int line_no);
+void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context);
+void sig_action(int signum, siginfo_t *info, void *context);
+static inline int get(FILE *stream);
+static inline void set(FILE *stream, int value);
+static inline int try_set(FILE *stream, int value);
+static inline void getr(int type, struct rlimit *rlim);
+static inline void setr(int type, struct rlimit *rlim);
+static inline void open_queue(struct mq_attr *attr);
+void increase_limits(void);
+
+static inline void __set(FILE *stream, int value, char *err_msg)
+{
+ rewind(stream);
+ if (fprintf(stream, "%d", value) < 0)
+ perror(err_msg);
+}
+
+
+void shutdown(int exit_val, char *err_cause, int line_no)
+{
+ static int in_shutdown = 0;
+ int errno_at_shutdown = errno;
+ int i;
+
+ /* In case we get called by multiple threads or from an sighandler */
+ if (in_shutdown++)
+ return;
+
+ /* Free the cpu_set allocated using CPU_ALLOC in main function */
+ CPU_FREE(cpu_set);
+
+ for (i = 0; i < num_cpus_to_pin; i++)
+ if (cpu_threads[i]) {
+ pthread_kill(cpu_threads[i], SIGUSR1);
+ pthread_join(cpu_threads[i], NULL);
+ }
+
+ if (queue != -1)
+ if (mq_close(queue))
+ perror("mq_close() during shutdown");
+ if (queue_path)
+ /*
+ * Be silent if this fails, if we cleaned up already it's
+ * expected to fail
+ */
+ mq_unlink(queue_path);
+ if (saved_max_msgs)
+ __set(max_msgs, saved_max_msgs,
+ "failed to restore saved_max_msgs");
+ if (saved_max_msgsize)
+ __set(max_msgsize, saved_max_msgsize,
+ "failed to restore saved_max_msgsize");
+ if (exit_val)
+ error(exit_val, errno_at_shutdown, "%s at %d",
+ err_cause, line_no);
+ exit(0);
+}
+
+void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context)
+{
+ if (pthread_self() != main_thread)
+ pthread_exit(0);
+ else {
+ fprintf(stderr, "Caught signal %d in SIGUSR1 handler, "
+ "exiting\n", signum);
+ shutdown(0, "", 0);
+ fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n");
+ exit(0);
+ }
+}
+
+void sig_action(int signum, siginfo_t *info, void *context)
+{
+ if (pthread_self() != main_thread)
+ pthread_kill(main_thread, signum);
+ else {
+ fprintf(stderr, "Caught signal %d, exiting\n", signum);
+ shutdown(0, "", 0);
+ fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n");
+ exit(0);
+ }
+}
+
+static inline int get(FILE *stream)
+{
+ int value;
+ rewind(stream);
+ if (fscanf(stream, "%d", &value) != 1)
+ shutdown(4, "Error reading /proc entry", __LINE__);
+ return value;
+}
+
+static inline void set(FILE *stream, int value)
+{
+ int new_value;
+
+ rewind(stream);
+ if (fprintf(stream, "%d", value) < 0)
+ return shutdown(5, "Failed writing to /proc file", __LINE__);
+ new_value = get(stream);
+ if (new_value != value)
+ return shutdown(5, "We didn't get what we wrote to /proc back",
+ __LINE__);
+}
+
+static inline int try_set(FILE *stream, int value)
+{
+ int new_value;
+
+ rewind(stream);
+ fprintf(stream, "%d", value);
+ new_value = get(stream);
+ return new_value == value;
+}
+
+static inline void getr(int type, struct rlimit *rlim)
+{
+ if (getrlimit(type, rlim))
+ shutdown(6, "getrlimit()", __LINE__);
+}
+
+static inline void setr(int type, struct rlimit *rlim)
+{
+ if (setrlimit(type, rlim))
+ shutdown(7, "setrlimit()", __LINE__);
+}
+
+/**
+ * open_queue - open the global queue for testing
+ * @attr - An attr struct specifying the desired queue traits
+ * @result - An attr struct that lists the actual traits the queue has
+ *
+ * This open is not allowed to fail, failure will result in an orderly
+ * shutdown of the program. The global queue_path is used to set what
+ * queue to open, the queue descriptor is saved in the global queue
+ * variable.
+ */
+static inline void open_queue(struct mq_attr *attr)
+{
+ int flags = O_RDWR | O_EXCL | O_CREAT | O_NONBLOCK;
+ int perms = DEFFILEMODE;
+
+ queue = mq_open(queue_path, flags, perms, attr);
+ if (queue == -1)
+ shutdown(1, "mq_open()", __LINE__);
+ if (mq_getattr(queue, &result))
+ shutdown(1, "mq_getattr()", __LINE__);
+ printf("\n\tQueue %s created:\n", queue_path);
+ printf("\t\tmq_flags:\t\t\t%s\n", result.mq_flags & O_NONBLOCK ?
+ "O_NONBLOCK" : "(null)");
+ printf("\t\tmq_maxmsg:\t\t\t%lu\n", result.mq_maxmsg);
+ printf("\t\tmq_msgsize:\t\t\t%lu\n", result.mq_msgsize);
+ printf("\t\tmq_curmsgs:\t\t\t%lu\n", result.mq_curmsgs);
+}
+
+void *fake_cont_thread(void *arg)
+{
+ int i;
+
+ for (i = 0; i < num_cpus_to_pin; i++)
+ if (cpu_threads[i] == pthread_self())
+ break;
+ printf("\tStarted fake continuous mode thread %d on CPU %d\n", i,
+ cpus_to_pin[i]);
+ while (1)
+ ;
+}
+
+void *cont_thread(void *arg)
+{
+ char buff[MSG_SIZE];
+ int i, priority;
+
+ for (i = 0; i < num_cpus_to_pin; i++)
+ if (cpu_threads[i] == pthread_self())
+ break;
+ printf("\tStarted continuous mode thread %d on CPU %d\n", i,
+ cpus_to_pin[i]);
+ while (1) {
+ while (mq_send(queue, buff, sizeof(buff), 0) == 0)
+ ;
+ mq_receive(queue, buff, sizeof(buff), &priority);
+ }
+}
+
+#define drain_queue() \
+ while (mq_receive(queue, buff, MSG_SIZE, &prio_in) == MSG_SIZE)
+
+#define do_untimed_send() \
+ do { \
+ if (mq_send(queue, buff, MSG_SIZE, prio_out)) \
+ shutdown(3, "Test send failure", __LINE__); \
+ } while (0)
+
+#define do_send_recv() \
+ do { \
+ clock_gettime(clock, &start); \
+ if (mq_send(queue, buff, MSG_SIZE, prio_out)) \
+ shutdown(3, "Test send failure", __LINE__); \
+ clock_gettime(clock, &middle); \
+ if (mq_receive(queue, buff, MSG_SIZE, &prio_in) != MSG_SIZE) \
+ shutdown(3, "Test receive failure", __LINE__); \
+ clock_gettime(clock, &end); \
+ nsec = ((middle.tv_sec - start.tv_sec) * 1000000000) + \
+ (middle.tv_nsec - start.tv_nsec); \
+ send_total.tv_nsec += nsec; \
+ if (send_total.tv_nsec >= 1000000000) { \
+ send_total.tv_sec++; \
+ send_total.tv_nsec -= 1000000000; \
+ } \
+ nsec = ((end.tv_sec - middle.tv_sec) * 1000000000) + \
+ (end.tv_nsec - middle.tv_nsec); \
+ recv_total.tv_nsec += nsec; \
+ if (recv_total.tv_nsec >= 1000000000) { \
+ recv_total.tv_sec++; \
+ recv_total.tv_nsec -= 1000000000; \
+ } \
+ } while (0)
+
+struct test {
+ char *desc;
+ void (*func)(int *);
+};
+
+void const_prio(int *prio)
+{
+ return;
+}
+
+void inc_prio(int *prio)
+{
+ if (++*prio == mq_prio_max)
+ *prio = 0;
+}
+
+void dec_prio(int *prio)
+{
+ if (--*prio < 0)
+ *prio = mq_prio_max - 1;
+}
+
+void random_prio(int *prio)
+{
+ *prio = random() % mq_prio_max;
+}
+
+struct test test2[] = {
+ {"\n\tTest #2a: Time send/recv message, queue full, constant prio\n",
+ const_prio},
+ {"\n\tTest #2b: Time send/recv message, queue full, increasing prio\n",
+ inc_prio},
+ {"\n\tTest #2c: Time send/recv message, queue full, decreasing prio\n",
+ dec_prio},
+ {"\n\tTest #2d: Time send/recv message, queue full, random prio\n",
+ random_prio},
+ {NULL, NULL}
+};
+
+/**
+ * Tests to perform (all done with MSG_SIZE messages):
+ *
+ * 1) Time to add/remove message with 0 messages on queue
+ * 1a) with constant prio
+ * 2) Time to add/remove message when queue close to capacity:
+ * 2a) with constant prio
+ * 2b) with increasing prio
+ * 2c) with decreasing prio
+ * 2d) with random prio
+ * 3) Test limits of priorities honored (double check _SC_MQ_PRIO_MAX)
+ */
+void *perf_test_thread(void *arg)
+{
+ char buff[MSG_SIZE];
+ int prio_out, prio_in;
+ int i;
+ clockid_t clock;
+ pthread_t *t;
+ struct timespec res, start, middle, end, send_total, recv_total;
+ unsigned long long nsec;
+ struct test *cur_test;
+
+ t = &cpu_threads[0];
+ printf("\n\tStarted mqueue performance test thread on CPU %d\n",
+ cpus_to_pin[0]);
+ mq_prio_max = sysconf(_SC_MQ_PRIO_MAX);
+ if (mq_prio_max == -1)
+ shutdown(2, "sysconf(_SC_MQ_PRIO_MAX)", __LINE__);
+ if (pthread_getcpuclockid(cpu_threads[0], &clock) != 0)
+ shutdown(2, "pthread_getcpuclockid", __LINE__);
+
+ if (clock_getres(clock, &res))
+ shutdown(2, "clock_getres()", __LINE__);
+
+ printf("\t\tMax priorities:\t\t\t%d\n", mq_prio_max);
+ printf("\t\tClock resolution:\t\t%lu nsec%s\n", res.tv_nsec,
+ res.tv_nsec > 1 ? "s" : "");
+
+
+
+ printf("\n\tTest #1: Time send/recv message, queue empty\n");
+ printf("\t\t(%d iterations)\n", TEST1_LOOPS);
+ prio_out = 0;
+ send_total.tv_sec = 0;
+ send_total.tv_nsec = 0;
+ recv_total.tv_sec = 0;
+ recv_total.tv_nsec = 0;
+ for (i = 0; i < TEST1_LOOPS; i++)
+ do_send_recv();
+ printf("\t\tSend msg:\t\t\t%ld.%lus total time\n",
+ send_total.tv_sec, send_total.tv_nsec);
+ nsec = ((unsigned long long)send_total.tv_sec * 1000000000 +
+ send_total.tv_nsec) / TEST1_LOOPS;
+ printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
+ printf("\t\tRecv msg:\t\t\t%ld.%lus total time\n",
+ recv_total.tv_sec, recv_total.tv_nsec);
+ nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 +
+ recv_total.tv_nsec) / TEST1_LOOPS;
+ printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
+
+
+ for (cur_test = test2; cur_test->desc != NULL; cur_test++) {
+ printf("%s:\n", cur_test->desc);
+ printf("\t\t(%d iterations)\n", TEST2_LOOPS);
+ prio_out = 0;
+ send_total.tv_sec = 0;
+ send_total.tv_nsec = 0;
+ recv_total.tv_sec = 0;
+ recv_total.tv_nsec = 0;
+ printf("\t\tFilling queue...");
+ fflush(stdout);
+ clock_gettime(clock, &start);
+ for (i = 0; i < result.mq_maxmsg - 1; i++) {
+ do_untimed_send();
+ cur_test->func(&prio_out);
+ }
+ clock_gettime(clock, &end);
+ nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) *
+ 1000000000) + (end.tv_nsec - start.tv_nsec);
+ printf("done.\t\t%lld.%llds\n", nsec / 1000000000,
+ nsec % 1000000000);
+ printf("\t\tTesting...");
+ fflush(stdout);
+ for (i = 0; i < TEST2_LOOPS; i++) {
+ do_send_recv();
+ cur_test->func(&prio_out);
+ }
+ printf("done.\n");
+ printf("\t\tSend msg:\t\t\t%ld.%lus total time\n",
+ send_total.tv_sec, send_total.tv_nsec);
+ nsec = ((unsigned long long)send_total.tv_sec * 1000000000 +
+ send_total.tv_nsec) / TEST2_LOOPS;
+ printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
+ printf("\t\tRecv msg:\t\t\t%ld.%lus total time\n",
+ recv_total.tv_sec, recv_total.tv_nsec);
+ nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 +
+ recv_total.tv_nsec) / TEST2_LOOPS;
+ printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
+ printf("\t\tDraining queue...");
+ fflush(stdout);
+ clock_gettime(clock, &start);
+ drain_queue();
+ clock_gettime(clock, &end);
+ nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) *
+ 1000000000) + (end.tv_nsec - start.tv_nsec);
+ printf("done.\t\t%lld.%llds\n", nsec / 1000000000,
+ nsec % 1000000000);
+ }
+ return 0;
+}
+
+void increase_limits(void)
+{
+ cur_limits.rlim_cur = RLIM_INFINITY;
+ cur_limits.rlim_max = RLIM_INFINITY;
+ setr(RLIMIT_MSGQUEUE, &cur_limits);
+ while (try_set(max_msgs, cur_max_msgs += 10))
+ ;
+ cur_max_msgs = get(max_msgs);
+ while (try_set(max_msgsize, cur_max_msgsize += 1024))
+ ;
+ cur_max_msgsize = get(max_msgsize);
+ if (setpriority(PRIO_PROCESS, 0, -20) != 0)
+ shutdown(2, "setpriority()", __LINE__);
+ cur_nice = -20;
+}
+
+int main(int argc, char *argv[])
+{
+ struct mq_attr attr;
+ char *option, *next_option;
+ int i, cpu, rc;
+ struct sigaction sa;
+ poptContext popt_context;
+ void *retval;
+
+ main_thread = pthread_self();
+ num_cpus_to_pin = 0;
+
+ if (sysconf(_SC_NPROCESSORS_ONLN) == -1) {
+ perror("sysconf(_SC_NPROCESSORS_ONLN)");
+ exit(1);
+ }
+
+ if (getuid() != 0)
+ ksft_exit_skip("Not running as root, but almost all tests "
+ "require root in order to modify\nsystem settings. "
+ "Exiting.\n");
+
+ cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
+ cpu_set = CPU_ALLOC(cpus_online);
+ if (cpu_set == NULL) {
+ perror("CPU_ALLOC()");
+ exit(1);
+ }
+ cpu_set_size = CPU_ALLOC_SIZE(cpus_online);
+ CPU_ZERO_S(cpu_set_size, cpu_set);
+
+ popt_context = poptGetContext(NULL, argc, (const char **)argv,
+ options, 0);
+
+ while ((rc = poptGetNextOpt(popt_context)) > 0) {
+ switch (rc) {
+ case 'c':
+ continuous_mode = 1;
+ option = cpu_option_string;
+ do {
+ next_option = strchr(option, ',');
+ if (next_option)
+ *next_option = '\0';
+ cpu = atoi(option);
+ if (cpu >= cpus_online)
+ fprintf(stderr, "CPU %d exceeds "
+ "cpus online, ignoring.\n",
+ cpu);
+ else
+ cpus_to_pin[num_cpus_to_pin++] = cpu;
+ if (next_option)
+ option = ++next_option;
+ } while (next_option && num_cpus_to_pin < MAX_CPUS);
+ /* Double check that they didn't give us the same CPU
+ * more than once */
+ for (cpu = 0; cpu < num_cpus_to_pin; cpu++) {
+ if (CPU_ISSET_S(cpus_to_pin[cpu], cpu_set_size,
+ cpu_set)) {
+ fprintf(stderr, "Any given CPU may "
+ "only be given once.\n");
+ goto err_code;
+ } else
+ CPU_SET_S(cpus_to_pin[cpu],
+ cpu_set_size, cpu_set);
+ }
+ break;
+ case 'p':
+ /*
+ * Although we can create a msg queue with a
+ * non-absolute path name, unlink will fail. So,
+ * if the name doesn't start with a /, add one
+ * when we save it.
+ */
+ option = queue_path;
+ if (*option != '/') {
+ queue_path = malloc(strlen(option) + 2);
+ if (!queue_path) {
+ perror("malloc()");
+ goto err_code;
+ }
+ queue_path[0] = '/';
+ queue_path[1] = 0;
+ strcat(queue_path, option);
+ free(option);
+ }
+ break;
+ }
+ }
+
+ if (continuous_mode && num_cpus_to_pin == 0) {
+ fprintf(stderr, "Must pass at least one CPU to continuous "
+ "mode.\n");
+ poptPrintUsage(popt_context, stderr, 0);
+ goto err_code;
+ } else if (!continuous_mode) {
+ num_cpus_to_pin = 1;
+ cpus_to_pin[0] = cpus_online - 1;
+ }
+
+ max_msgs = fopen(MAX_MSGS, "r+");
+ max_msgsize = fopen(MAX_MSGSIZE, "r+");
+ if (!max_msgs)
+ shutdown(2, "Failed to open msg_max", __LINE__);
+ if (!max_msgsize)
+ shutdown(2, "Failed to open msgsize_max", __LINE__);
+
+ /* Load up the current system values for everything we can */
+ getr(RLIMIT_MSGQUEUE, &saved_limits);
+ cur_limits = saved_limits;
+ saved_max_msgs = cur_max_msgs = get(max_msgs);
+ saved_max_msgsize = cur_max_msgsize = get(max_msgsize);
+ errno = 0;
+ cur_nice = getpriority(PRIO_PROCESS, 0);
+ if (errno)
+ shutdown(2, "getpriority()", __LINE__);
+
+ /* Tell the user our initial state */
+ printf("\nInitial system state:\n");
+ printf("\tUsing queue path:\t\t\t%s\n", queue_path);
+ printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%ld\n",
+ (long) saved_limits.rlim_cur);
+ printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%ld\n",
+ (long) saved_limits.rlim_max);
+ printf("\tMaximum Message Size:\t\t\t%d\n", saved_max_msgsize);
+ printf("\tMaximum Queue Size:\t\t\t%d\n", saved_max_msgs);
+ printf("\tNice value:\t\t\t\t%d\n", cur_nice);
+ printf("\n");
+
+ increase_limits();
+
+ printf("Adjusted system state for testing:\n");
+ if (cur_limits.rlim_cur == RLIM_INFINITY) {
+ printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t(unlimited)\n");
+ printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t(unlimited)\n");
+ } else {
+ printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%ld\n",
+ (long) cur_limits.rlim_cur);
+ printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%ld\n",
+ (long) cur_limits.rlim_max);
+ }
+ printf("\tMaximum Message Size:\t\t\t%d\n", cur_max_msgsize);
+ printf("\tMaximum Queue Size:\t\t\t%d\n", cur_max_msgs);
+ printf("\tNice value:\t\t\t\t%d\n", cur_nice);
+ printf("\tContinuous mode:\t\t\t(%s)\n", continuous_mode ?
+ (continuous_mode_fake ? "fake mode" : "enabled") :
+ "disabled");
+ printf("\tCPUs to pin:\t\t\t\t%d", cpus_to_pin[0]);
+ for (cpu = 1; cpu < num_cpus_to_pin; cpu++)
+ printf(",%d", cpus_to_pin[cpu]);
+ printf("\n");
+
+ sa.sa_sigaction = sig_action_SIGUSR1;
+ sigemptyset(&sa.sa_mask);
+ sigaddset(&sa.sa_mask, SIGHUP);
+ sigaddset(&sa.sa_mask, SIGINT);
+ sigaddset(&sa.sa_mask, SIGQUIT);
+ sigaddset(&sa.sa_mask, SIGTERM);
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGUSR1, &sa, NULL) == -1)
+ shutdown(1, "sigaction(SIGUSR1)", __LINE__);
+ sa.sa_sigaction = sig_action;
+ if (sigaction(SIGHUP, &sa, NULL) == -1)
+ shutdown(1, "sigaction(SIGHUP)", __LINE__);
+ if (sigaction(SIGINT, &sa, NULL) == -1)
+ shutdown(1, "sigaction(SIGINT)", __LINE__);
+ if (sigaction(SIGQUIT, &sa, NULL) == -1)
+ shutdown(1, "sigaction(SIGQUIT)", __LINE__);
+ if (sigaction(SIGTERM, &sa, NULL) == -1)
+ shutdown(1, "sigaction(SIGTERM)", __LINE__);
+
+ if (!continuous_mode_fake) {
+ attr.mq_flags = O_NONBLOCK;
+ attr.mq_maxmsg = cur_max_msgs;
+ attr.mq_msgsize = MSG_SIZE;
+ open_queue(&attr);
+ }
+ for (i = 0; i < num_cpus_to_pin; i++) {
+ pthread_attr_t thread_attr;
+ void *thread_func;
+
+ if (continuous_mode_fake)
+ thread_func = &fake_cont_thread;
+ else if (continuous_mode)
+ thread_func = &cont_thread;
+ else
+ thread_func = &perf_test_thread;
+
+ CPU_ZERO_S(cpu_set_size, cpu_set);
+ CPU_SET_S(cpus_to_pin[i], cpu_set_size, cpu_set);
+ pthread_attr_init(&thread_attr);
+ pthread_attr_setaffinity_np(&thread_attr, cpu_set_size,
+ cpu_set);
+ if (pthread_create(&cpu_threads[i], &thread_attr, thread_func,
+ NULL))
+ shutdown(1, "pthread_create()", __LINE__);
+ pthread_attr_destroy(&thread_attr);
+ }
+
+ if (!continuous_mode) {
+ pthread_join(cpu_threads[0], &retval);
+ shutdown((long)retval, "perf_test_thread()", __LINE__);
+ } else {
+ while (1)
+ sleep(1);
+ }
+ shutdown(0, "", 0);
+
+err_code:
+ CPU_FREE(cpu_set);
+ exit(1);
+
+}
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
new file mode 100644
index 000000000..78b24cf76
--- /dev/null
+++ b/tools/testing/selftests/net/.gitignore
@@ -0,0 +1,16 @@
+msg_zerocopy
+socket
+psock_fanout
+psock_snd
+psock_tpacket
+reuseport_bpf
+reuseport_bpf_cpu
+reuseport_bpf_numa
+reuseport_dualstack
+reuseaddr_conflict
+tcp_mmap
+udpgso
+udpgso_bench_rx
+udpgso_bench_tx
+tcp_inq
+tls
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
new file mode 100644
index 000000000..9a3764a10
--- /dev/null
+++ b/tools/testing/selftests/net/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for net selftests
+
+CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS += -I../../../../usr/include/
+
+TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh
+TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
+TEST_PROGS_EXTENDED := in_netns.sh
+TEST_GEN_FILES = socket
+TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
+TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd
+TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx
+TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
+TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+
+KSFT_KHDR_INSTALL := 1
+include ../lib.mk
+
+$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
+$(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread
+$(OUTPUT)/tcp_inq: LDFLAGS += -lpthread
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
new file mode 100644
index 000000000..cd3a2f154
--- /dev/null
+++ b/tools/testing/selftests/net/config
@@ -0,0 +1,16 @@
+CONFIG_USER_NS=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_TEST_BPF=m
+CONFIG_NUMA=y
+CONFIG_NET_VRF=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_VETH=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_NET_IPVTI=y
+CONFIG_INET6_XFRM_MODE_TUNNEL=y
+CONFIG_IPV6_VTI=y
+CONFIG_DUMMY=y
+CONFIG_BRIDGE=y
+CONFIG_VLAN_8021Q=y
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
new file mode 100755
index 000000000..864f865ee
--- /dev/null
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -0,0 +1,467 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# IPv4 and IPv6 onlink tests
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# Network interfaces
+# - odd in current namespace; even in peer ns
+declare -A NETIFS
+# default VRF
+NETIFS[p1]=veth1
+NETIFS[p2]=veth2
+NETIFS[p3]=veth3
+NETIFS[p4]=veth4
+# VRF
+NETIFS[p5]=veth5
+NETIFS[p6]=veth6
+NETIFS[p7]=veth7
+NETIFS[p8]=veth8
+
+# /24 network
+declare -A V4ADDRS
+V4ADDRS[p1]=169.254.1.1
+V4ADDRS[p2]=169.254.1.2
+V4ADDRS[p3]=169.254.3.1
+V4ADDRS[p4]=169.254.3.2
+V4ADDRS[p5]=169.254.5.1
+V4ADDRS[p6]=169.254.5.2
+V4ADDRS[p7]=169.254.7.1
+V4ADDRS[p8]=169.254.7.2
+
+# /64 network
+declare -A V6ADDRS
+V6ADDRS[p1]=2001:db8:101::1
+V6ADDRS[p2]=2001:db8:101::2
+V6ADDRS[p3]=2001:db8:301::1
+V6ADDRS[p4]=2001:db8:301::2
+V6ADDRS[p5]=2001:db8:501::1
+V6ADDRS[p6]=2001:db8:501::2
+V6ADDRS[p7]=2001:db8:701::1
+V6ADDRS[p8]=2001:db8:701::2
+
+# Test networks:
+# [1] = default table
+# [2] = VRF
+#
+# /32 host routes
+declare -A TEST_NET4
+TEST_NET4[1]=169.254.101
+TEST_NET4[2]=169.254.102
+# /128 host routes
+declare -A TEST_NET6
+TEST_NET6[1]=2001:db8:101
+TEST_NET6[2]=2001:db8:102
+
+# connected gateway
+CONGW[1]=169.254.1.254
+CONGW[2]=169.254.3.254
+CONGW[3]=169.254.5.254
+
+# recursive gateway
+RECGW4[1]=169.254.11.254
+RECGW4[2]=169.254.12.254
+RECGW6[1]=2001:db8:11::64
+RECGW6[2]=2001:db8:12::64
+
+# for v4 mapped to v6
+declare -A TEST_NET4IN6IN6
+TEST_NET4IN6[1]=10.1.1.254
+TEST_NET4IN6[2]=10.2.1.254
+
+# mcast address
+MCAST6=ff02::1
+
+
+PEER_NS=bart
+PEER_CMD="ip netns exec ${PEER_NS}"
+VRF=lisa
+VRF_TABLE=1101
+PBR_TABLE=101
+
+################################################################################
+# utilities
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-50s [ OK ]\n" "${msg}"
+ else
+ nfail=$((nfail+1))
+ printf "\n TEST: %-50s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "######################################################################"
+ echo "TEST SECTION: $*"
+ echo "######################################################################"
+}
+
+log_subsection()
+{
+ echo
+ echo "#########################################"
+ echo "TEST SUBSECTION: $*"
+}
+
+run_cmd()
+{
+ echo
+ echo "COMMAND: $*"
+ eval $*
+}
+
+get_linklocal()
+{
+ local dev=$1
+ local pfx
+ local addr
+
+ addr=$(${pfx} ip -6 -br addr show dev ${dev} | \
+ awk '{
+ for (i = 3; i <= NF; ++i) {
+ if ($i ~ /^fe80/)
+ print $i
+ }
+ }'
+ )
+ addr=${addr/\/*}
+
+ [ -z "$addr" ] && return 1
+
+ echo $addr
+
+ return 0
+}
+
+################################################################################
+#
+
+setup()
+{
+ echo
+ echo "########################################"
+ echo "Configuring interfaces"
+
+ set -e
+
+ # create namespace
+ ip netns add ${PEER_NS}
+ ip -netns ${PEER_NS} li set lo up
+
+ # add vrf table
+ ip li add ${VRF} type vrf table ${VRF_TABLE}
+ ip li set ${VRF} up
+ ip ro add table ${VRF_TABLE} unreachable default metric 8192
+ ip -6 ro add table ${VRF_TABLE} unreachable default metric 8192
+
+ # create test interfaces
+ ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
+ ip li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]}
+ ip li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]}
+ ip li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]}
+
+ # enslave vrf interfaces
+ for n in 5 7; do
+ ip li set ${NETIFS[p${n}]} vrf ${VRF}
+ done
+
+ # add addresses
+ for n in 1 3 5 7; do
+ ip li set ${NETIFS[p${n}]} up
+ ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+ ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
+ done
+
+ # move peer interfaces to namespace and add addresses
+ for n in 2 4 6 8; do
+ ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up
+ ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+ ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
+ done
+
+ ip -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64}
+ ip -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64}
+
+ set +e
+}
+
+cleanup()
+{
+ # make sure we start from a clean slate
+ ip netns del ${PEER_NS} 2>/dev/null
+ for n in 1 3 5 7; do
+ ip link del ${NETIFS[p${n}]} 2>/dev/null
+ done
+ ip link del ${VRF} 2>/dev/null
+ ip ro flush table ${VRF_TABLE}
+ ip -6 ro flush table ${VRF_TABLE}
+}
+
+################################################################################
+# IPv4 tests
+#
+
+run_ip()
+{
+ local table="$1"
+ local prefix="$2"
+ local gw="$3"
+ local dev="$4"
+ local exp_rc="$5"
+ local desc="$6"
+
+ # dev arg may be empty
+ [ -n "${dev}" ] && dev="dev ${dev}"
+
+ run_cmd ip ro add table "${table}" "${prefix}"/32 via "${gw}" "${dev}" onlink
+ log_test $? ${exp_rc} "${desc}"
+}
+
+run_ip_mpath()
+{
+ local table="$1"
+ local prefix="$2"
+ local nh1="$3"
+ local nh2="$4"
+ local exp_rc="$5"
+ local desc="$6"
+
+ # dev arg may be empty
+ [ -n "${dev}" ] && dev="dev ${dev}"
+
+ run_cmd ip ro add table "${table}" "${prefix}"/32 \
+ nexthop via ${nh1} nexthop via ${nh2}
+ log_test $? ${exp_rc} "${desc}"
+}
+
+valid_onlink_ipv4()
+{
+ # - unicast connected, unicast recursive
+ #
+ log_subsection "default VRF - main table"
+
+ run_ip 254 ${TEST_NET4[1]}.1 ${CONGW[1]} ${NETIFS[p1]} 0 "unicast connected"
+ run_ip 254 ${TEST_NET4[1]}.2 ${RECGW4[1]} ${NETIFS[p1]} 0 "unicast recursive"
+
+ log_subsection "VRF ${VRF}"
+
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+
+ log_subsection "VRF device, PBR table"
+
+ run_ip ${PBR_TABLE} ${TEST_NET4[2]}.3 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
+ run_ip ${PBR_TABLE} ${TEST_NET4[2]}.4 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+
+ # multipath version
+ #
+ log_subsection "default VRF - main table - multipath"
+
+ run_ip_mpath 254 ${TEST_NET4[1]}.5 \
+ "${CONGW[1]} dev ${NETIFS[p1]} onlink" \
+ "${CONGW[2]} dev ${NETIFS[p3]} onlink" \
+ 0 "unicast connected - multipath"
+
+ run_ip_mpath 254 ${TEST_NET4[1]}.6 \
+ "${RECGW4[1]} dev ${NETIFS[p1]} onlink" \
+ "${RECGW4[2]} dev ${NETIFS[p3]} onlink" \
+ 0 "unicast recursive - multipath"
+
+ run_ip_mpath 254 ${TEST_NET4[1]}.7 \
+ "${CONGW[1]} dev ${NETIFS[p1]}" \
+ "${CONGW[2]} dev ${NETIFS[p3]} onlink" \
+ 0 "unicast connected - multipath onlink first only"
+
+ run_ip_mpath 254 ${TEST_NET4[1]}.8 \
+ "${CONGW[1]} dev ${NETIFS[p1]} onlink" \
+ "${CONGW[2]} dev ${NETIFS[p3]}" \
+ 0 "unicast connected - multipath onlink second only"
+}
+
+invalid_onlink_ipv4()
+{
+ run_ip 254 ${TEST_NET4[1]}.11 ${V4ADDRS[p1]} ${NETIFS[p1]} 2 \
+ "Invalid gw - local unicast address"
+
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.11 ${V4ADDRS[p5]} ${NETIFS[p5]} 2 \
+ "Invalid gw - local unicast address, VRF"
+
+ run_ip 254 ${TEST_NET4[1]}.101 ${V4ADDRS[p1]} "" 2 "No nexthop device given"
+
+ run_ip 254 ${TEST_NET4[1]}.102 ${V4ADDRS[p3]} ${NETIFS[p1]} 2 \
+ "Gateway resolves to wrong nexthop device"
+
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.103 ${V4ADDRS[p7]} ${NETIFS[p5]} 2 \
+ "Gateway resolves to wrong nexthop device - VRF"
+}
+
+################################################################################
+# IPv6 tests
+#
+
+run_ip6()
+{
+ local table="$1"
+ local prefix="$2"
+ local gw="$3"
+ local dev="$4"
+ local exp_rc="$5"
+ local desc="$6"
+
+ # dev arg may be empty
+ [ -n "${dev}" ] && dev="dev ${dev}"
+
+ run_cmd ip -6 ro add table "${table}" "${prefix}"/128 via "${gw}" "${dev}" onlink
+ log_test $? ${exp_rc} "${desc}"
+}
+
+run_ip6_mpath()
+{
+ local table="$1"
+ local prefix="$2"
+ local opts="$3"
+ local nh1="$4"
+ local nh2="$5"
+ local exp_rc="$6"
+ local desc="$7"
+
+ run_cmd ip -6 ro add table "${table}" "${prefix}"/128 "${opts}" \
+ nexthop via ${nh1} nexthop via ${nh2}
+ log_test $? ${exp_rc} "${desc}"
+}
+
+valid_onlink_ipv6()
+{
+ # - unicast connected, unicast recursive, v4-mapped
+ #
+ log_subsection "default VRF - main table"
+
+ run_ip6 254 ${TEST_NET6[1]}::1 ${V6ADDRS[p1]/::*}::64 ${NETIFS[p1]} 0 "unicast connected"
+ run_ip6 254 ${TEST_NET6[1]}::2 ${RECGW6[1]} ${NETIFS[p1]} 0 "unicast recursive"
+ run_ip6 254 ${TEST_NET6[1]}::3 ::ffff:${TEST_NET4IN6[1]} ${NETIFS[p1]} 0 "v4-mapped"
+
+ log_subsection "VRF ${VRF}"
+
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::1 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::2 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::3 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+
+ log_subsection "VRF device, PBR table"
+
+ run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::4 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
+ run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::5 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
+ run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::6 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+
+ # multipath version
+ #
+ log_subsection "default VRF - main table - multipath"
+
+ run_ip6_mpath 254 ${TEST_NET6[1]}::4 "onlink" \
+ "${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]}" \
+ "${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]}" \
+ 0 "unicast connected - multipath onlink"
+
+ run_ip6_mpath 254 ${TEST_NET6[1]}::5 "onlink" \
+ "${RECGW6[1]} dev ${NETIFS[p1]}" \
+ "${RECGW6[2]} dev ${NETIFS[p3]}" \
+ 0 "unicast recursive - multipath onlink"
+
+ run_ip6_mpath 254 ${TEST_NET6[1]}::6 "onlink" \
+ "::ffff:${TEST_NET4IN6[1]} dev ${NETIFS[p1]}" \
+ "::ffff:${TEST_NET4IN6[2]} dev ${NETIFS[p3]}" \
+ 0 "v4-mapped - multipath onlink"
+
+ run_ip6_mpath 254 ${TEST_NET6[1]}::7 "" \
+ "${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]} onlink" \
+ "${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]} onlink" \
+ 0 "unicast connected - multipath onlink both nexthops"
+
+ run_ip6_mpath 254 ${TEST_NET6[1]}::8 "" \
+ "${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]} onlink" \
+ "${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]}" \
+ 0 "unicast connected - multipath onlink first only"
+
+ run_ip6_mpath 254 ${TEST_NET6[1]}::9 "" \
+ "${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]}" \
+ "${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]} onlink" \
+ 0 "unicast connected - multipath onlink second only"
+}
+
+invalid_onlink_ipv6()
+{
+ local lladdr
+
+ lladdr=$(get_linklocal ${NETIFS[p1]}) || return 1
+
+ run_ip6 254 ${TEST_NET6[1]}::11 ${V6ADDRS[p1]} ${NETIFS[p1]} 2 \
+ "Invalid gw - local unicast address"
+ run_ip6 254 ${TEST_NET6[1]}::12 ${lladdr} ${NETIFS[p1]} 2 \
+ "Invalid gw - local linklocal address"
+ run_ip6 254 ${TEST_NET6[1]}::12 ${MCAST6} ${NETIFS[p1]} 2 \
+ "Invalid gw - multicast address"
+
+ lladdr=$(get_linklocal ${NETIFS[p5]}) || return 1
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::11 ${V6ADDRS[p5]} ${NETIFS[p5]} 2 \
+ "Invalid gw - local unicast address, VRF"
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${lladdr} ${NETIFS[p5]} 2 \
+ "Invalid gw - local linklocal address, VRF"
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${MCAST6} ${NETIFS[p5]} 2 \
+ "Invalid gw - multicast address, VRF"
+
+ run_ip6 254 ${TEST_NET6[1]}::101 ${V6ADDRS[p1]} "" 2 \
+ "No nexthop device given"
+
+ # default VRF validation is done against LOCAL table
+ # run_ip6 254 ${TEST_NET6[1]}::102 ${V6ADDRS[p3]/::[0-9]/::64} ${NETIFS[p1]} 2 \
+ # "Gateway resolves to wrong nexthop device"
+
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::103 ${V6ADDRS[p7]/::[0-9]/::64} ${NETIFS[p5]} 2 \
+ "Gateway resolves to wrong nexthop device - VRF"
+}
+
+run_onlink_tests()
+{
+ log_section "IPv4 onlink"
+ log_subsection "Valid onlink commands"
+ valid_onlink_ipv4
+ log_subsection "Invalid onlink commands"
+ invalid_onlink_ipv4
+
+ log_section "IPv6 onlink"
+ log_subsection "Valid onlink commands"
+ valid_onlink_ipv6
+ log_subsection "Invalid onlink commands"
+ invalid_onlink_ipv6
+}
+
+################################################################################
+# main
+
+nsuccess=0
+nfail=0
+
+cleanup
+setup
+run_onlink_tests
+cleanup
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
new file mode 100755
index 000000000..ba2d9fab2
--- /dev/null
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -0,0 +1,255 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking IPv4 and IPv6 FIB rules API
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+IP="ip -netns testns"
+
+RTABLE=100
+GW_IP4=192.51.100.2
+SRC_IP=192.51.100.3
+GW_IP6=2001:db8:1::2
+SRC_IP6=2001:db8:1::3
+
+DEV_ADDR=192.51.100.1
+DEV_ADDR6=2001:db8:1::1
+DEV=dummy0
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-50s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-50s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "######################################################################"
+ echo "TEST SECTION: $*"
+ echo "######################################################################"
+}
+
+setup()
+{
+ set -e
+ ip netns add testns
+ $IP link set dev lo up
+
+ $IP link add dummy0 type dummy
+ $IP link set dev dummy0 up
+ $IP address add $DEV_ADDR/24 dev dummy0
+ $IP -6 address add $DEV_ADDR6/64 dev dummy0
+
+ set +e
+}
+
+cleanup()
+{
+ $IP link del dev dummy0 &> /dev/null
+ ip netns del testns
+}
+
+fib_check_iproute_support()
+{
+ ip rule help 2>&1 | grep -q $1
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 iprule too old, missing $1 match"
+ return 1
+ fi
+
+ ip route get help 2>&1 | grep -q $2
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 get route too old, missing $2 match"
+ return 1
+ fi
+
+ return 0
+}
+
+fib_rule6_del()
+{
+ $IP -6 rule del $1
+ log_test $? 0 "rule6 del $1"
+}
+
+fib_rule6_del_by_pref()
+{
+ pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+ $IP -6 rule del pref $pref
+}
+
+fib_rule6_test_match_n_redirect()
+{
+ local match="$1"
+ local getmatch="$2"
+
+ $IP -6 rule add $match table $RTABLE
+ $IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
+ log_test $? 0 "rule6 check: $1"
+
+ fib_rule6_del_by_pref "$match"
+ log_test $? 0 "rule6 del by pref: $match"
+}
+
+fib_rule6_test()
+{
+ # setup the fib rule redirect route
+ $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
+
+ match="oif $DEV"
+ fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+ match="from $SRC_IP6 iif $DEV"
+ fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+ match="tos 0x10"
+ fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+ match="fwmark 0x64"
+ getmatch="mark 0x64"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+ fib_check_iproute_support "uidrange" "uid"
+ if [ $? -eq 0 ]; then
+ match="uidrange 100-100"
+ getmatch="uid 100"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+ fi
+
+ fib_check_iproute_support "sport" "sport"
+ if [ $? -eq 0 ]; then
+ match="sport 666 dport 777"
+ fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+ fi
+
+ fib_check_iproute_support "ipproto" "ipproto"
+ if [ $? -eq 0 ]; then
+ match="ipproto tcp"
+ fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match"
+ fi
+
+ fib_check_iproute_support "ipproto" "ipproto"
+ if [ $? -eq 0 ]; then
+ match="ipproto ipv6-icmp"
+ fib_rule6_test_match_n_redirect "$match" "$match" "ipproto ipv6-icmp match"
+ fi
+}
+
+fib_rule4_del()
+{
+ $IP rule del $1
+ log_test $? 0 "del $1"
+}
+
+fib_rule4_del_by_pref()
+{
+ pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+ $IP rule del pref $pref
+}
+
+fib_rule4_test_match_n_redirect()
+{
+ local match="$1"
+ local getmatch="$2"
+
+ $IP rule add $match table $RTABLE
+ $IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
+ log_test $? 0 "rule4 check: $1"
+
+ fib_rule4_del_by_pref "$match"
+ log_test $? 0 "rule4 del by pref: $match"
+}
+
+fib_rule4_test()
+{
+ # setup the fib rule redirect route
+ $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
+
+ match="oif $DEV"
+ fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+ match="from $SRC_IP iif $DEV"
+ fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+ match="tos 0x10"
+ fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+ match="fwmark 0x64"
+ getmatch="mark 0x64"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+ fib_check_iproute_support "uidrange" "uid"
+ if [ $? -eq 0 ]; then
+ match="uidrange 100-100"
+ getmatch="uid 100"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+ fi
+
+ fib_check_iproute_support "sport" "sport"
+ if [ $? -eq 0 ]; then
+ match="sport 666 dport 777"
+ fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+ fi
+
+ fib_check_iproute_support "ipproto" "ipproto"
+ if [ $? -eq 0 ]; then
+ match="ipproto tcp"
+ fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match"
+ fi
+
+ fib_check_iproute_support "ipproto" "ipproto"
+ if [ $? -eq 0 ]; then
+ match="ipproto icmp"
+ fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+ fi
+}
+
+run_fibrule_tests()
+{
+ log_section "IPv4 fib rule"
+ fib_rule4_test
+ log_section "IPv6 fib rule"
+ fib_rule6_test
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit 0
+fi
+
+# start clean
+cleanup &> /dev/null
+setup
+run_fibrule_tests
+cleanup
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
new file mode 100755
index 000000000..a5ba14976
--- /dev/null
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -0,0 +1,1439 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking IPv4 and IPv6 FIB behavior in response to
+# different events.
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# all tests in this script. Can be overridden with -t option
+TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+IP="ip -netns testns"
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf " TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf " TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+}
+
+setup()
+{
+ set -e
+ ip netns add testns
+ $IP link set dev lo up
+
+ $IP link add dummy0 type dummy
+ $IP link set dev dummy0 up
+ $IP address add 198.51.100.1/24 dev dummy0
+ $IP -6 address add 2001:db8:1::1/64 dev dummy0
+ set +e
+
+}
+
+cleanup()
+{
+ $IP link del dev dummy0 &> /dev/null
+ ip netns del testns
+}
+
+get_linklocal()
+{
+ local dev=$1
+ local addr
+
+ addr=$($IP -6 -br addr show dev ${dev} | \
+ awk '{
+ for (i = 3; i <= NF; ++i) {
+ if ($i ~ /^fe80/)
+ print $i
+ }
+ }'
+ )
+ addr=${addr/\/*}
+
+ [ -z "$addr" ] && return 1
+
+ echo $addr
+
+ return 0
+}
+
+fib_unreg_unicast_test()
+{
+ echo
+ echo "Single path route test"
+
+ setup
+
+ echo " Start point"
+ $IP route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ set -e
+ $IP link del dev dummy0
+ set +e
+
+ echo " Nexthop device deleted"
+ $IP route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 2 "IPv4 fibmatch - no route"
+ $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 2 "IPv6 fibmatch - no route"
+
+ cleanup
+}
+
+fib_unreg_multipath_test()
+{
+
+ echo
+ echo "Multipath route test"
+
+ setup
+
+ set -e
+ $IP link add dummy1 type dummy
+ $IP link set dev dummy1 up
+ $IP address add 192.0.2.1/24 dev dummy1
+ $IP -6 address add 2001:db8:2::1/64 dev dummy1
+
+ $IP route add 203.0.113.0/24 \
+ nexthop via 198.51.100.2 dev dummy0 \
+ nexthop via 192.0.2.2 dev dummy1
+ $IP -6 route add 2001:db8:3::/64 \
+ nexthop via 2001:db8:1::2 dev dummy0 \
+ nexthop via 2001:db8:2::2 dev dummy1
+ set +e
+
+ echo " Start point"
+ $IP route get fibmatch 203.0.113.1 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ set -e
+ $IP link del dev dummy0
+ set +e
+
+ echo " One nexthop device deleted"
+ $IP route get fibmatch 203.0.113.1 &> /dev/null
+ log_test $? 2 "IPv4 - multipath route removed on delete"
+
+ $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+ # In IPv6 we do not flush the entire multipath route.
+ log_test $? 0 "IPv6 - multipath down to single path"
+
+ set -e
+ $IP link del dev dummy1
+ set +e
+
+ echo " Second nexthop device deleted"
+ $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+ log_test $? 2 "IPv6 - no route"
+
+ cleanup
+}
+
+fib_unreg_test()
+{
+ fib_unreg_unicast_test
+ fib_unreg_multipath_test
+}
+
+fib_down_unicast_test()
+{
+ echo
+ echo "Single path, admin down"
+
+ setup
+
+ echo " Start point"
+ $IP route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ set -e
+ $IP link set dev dummy0 down
+ set +e
+
+ echo " Route deleted on down"
+ $IP route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 2 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 2 "IPv6 fibmatch"
+
+ cleanup
+}
+
+fib_down_multipath_test_do()
+{
+ local down_dev=$1
+ local up_dev=$2
+
+ $IP route get fibmatch 203.0.113.1 \
+ oif $down_dev &> /dev/null
+ log_test $? 2 "IPv4 fibmatch on down device"
+ $IP -6 route get fibmatch 2001:db8:3::1 \
+ oif $down_dev &> /dev/null
+ log_test $? 2 "IPv6 fibmatch on down device"
+
+ $IP route get fibmatch 203.0.113.1 \
+ oif $up_dev &> /dev/null
+ log_test $? 0 "IPv4 fibmatch on up device"
+ $IP -6 route get fibmatch 2001:db8:3::1 \
+ oif $up_dev &> /dev/null
+ log_test $? 0 "IPv6 fibmatch on up device"
+
+ $IP route get fibmatch 203.0.113.1 | \
+ grep $down_dev | grep -q "dead linkdown"
+ log_test $? 0 "IPv4 flags on down device"
+ $IP -6 route get fibmatch 2001:db8:3::1 | \
+ grep $down_dev | grep -q "dead linkdown"
+ log_test $? 0 "IPv6 flags on down device"
+
+ $IP route get fibmatch 203.0.113.1 | \
+ grep $up_dev | grep -q "dead linkdown"
+ log_test $? 1 "IPv4 flags on up device"
+ $IP -6 route get fibmatch 2001:db8:3::1 | \
+ grep $up_dev | grep -q "dead linkdown"
+ log_test $? 1 "IPv6 flags on up device"
+}
+
+fib_down_multipath_test()
+{
+ echo
+ echo "Admin down multipath"
+
+ setup
+
+ set -e
+ $IP link add dummy1 type dummy
+ $IP link set dev dummy1 up
+
+ $IP address add 192.0.2.1/24 dev dummy1
+ $IP -6 address add 2001:db8:2::1/64 dev dummy1
+
+ $IP route add 203.0.113.0/24 \
+ nexthop via 198.51.100.2 dev dummy0 \
+ nexthop via 192.0.2.2 dev dummy1
+ $IP -6 route add 2001:db8:3::/64 \
+ nexthop via 2001:db8:1::2 dev dummy0 \
+ nexthop via 2001:db8:2::2 dev dummy1
+ set +e
+
+ echo " Verify start point"
+ $IP route get fibmatch 203.0.113.1 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+
+ $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ set -e
+ $IP link set dev dummy0 down
+ set +e
+
+ echo " One device down, one up"
+ fib_down_multipath_test_do "dummy0" "dummy1"
+
+ set -e
+ $IP link set dev dummy0 up
+ $IP link set dev dummy1 down
+ set +e
+
+ echo " Other device down and up"
+ fib_down_multipath_test_do "dummy1" "dummy0"
+
+ set -e
+ $IP link set dev dummy0 down
+ set +e
+
+ echo " Both devices down"
+ $IP route get fibmatch 203.0.113.1 &> /dev/null
+ log_test $? 2 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+ log_test $? 2 "IPv6 fibmatch"
+
+ $IP link del dev dummy1
+ cleanup
+}
+
+fib_down_test()
+{
+ fib_down_unicast_test
+ fib_down_multipath_test
+}
+
+# Local routes should not be affected when carrier changes.
+fib_carrier_local_test()
+{
+ echo
+ echo "Local carrier tests - single path"
+
+ setup
+
+ set -e
+ $IP link set dev dummy0 carrier on
+ set +e
+
+ echo " Start point"
+ $IP route get fibmatch 198.51.100.1 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ $IP route get fibmatch 198.51.100.1 | \
+ grep -q "linkdown"
+ log_test $? 1 "IPv4 - no linkdown flag"
+ $IP -6 route get fibmatch 2001:db8:1::1 | \
+ grep -q "linkdown"
+ log_test $? 1 "IPv6 - no linkdown flag"
+
+ set -e
+ $IP link set dev dummy0 carrier off
+ sleep 1
+ set +e
+
+ echo " Carrier off on nexthop"
+ $IP route get fibmatch 198.51.100.1 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ $IP route get fibmatch 198.51.100.1 | \
+ grep -q "linkdown"
+ log_test $? 1 "IPv4 - linkdown flag set"
+ $IP -6 route get fibmatch 2001:db8:1::1 | \
+ grep -q "linkdown"
+ log_test $? 1 "IPv6 - linkdown flag set"
+
+ set -e
+ $IP address add 192.0.2.1/24 dev dummy0
+ $IP -6 address add 2001:db8:2::1/64 dev dummy0
+ set +e
+
+ echo " Route to local address with carrier down"
+ $IP route get fibmatch 192.0.2.1 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:2::1 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ $IP route get fibmatch 192.0.2.1 | \
+ grep -q "linkdown"
+ log_test $? 1 "IPv4 linkdown flag set"
+ $IP -6 route get fibmatch 2001:db8:2::1 | \
+ grep -q "linkdown"
+ log_test $? 1 "IPv6 linkdown flag set"
+
+ cleanup
+}
+
+fib_carrier_unicast_test()
+{
+ ret=0
+
+ echo
+ echo "Single path route carrier test"
+
+ setup
+
+ set -e
+ $IP link set dev dummy0 carrier on
+ set +e
+
+ echo " Start point"
+ $IP route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ $IP route get fibmatch 198.51.100.2 | \
+ grep -q "linkdown"
+ log_test $? 1 "IPv4 no linkdown flag"
+ $IP -6 route get fibmatch 2001:db8:1::2 | \
+ grep -q "linkdown"
+ log_test $? 1 "IPv6 no linkdown flag"
+
+ set -e
+ $IP link set dev dummy0 carrier off
+ sleep 1
+ set +e
+
+ echo " Carrier down"
+ $IP route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ $IP route get fibmatch 198.51.100.2 | \
+ grep -q "linkdown"
+ log_test $? 0 "IPv4 linkdown flag set"
+ $IP -6 route get fibmatch 2001:db8:1::2 | \
+ grep -q "linkdown"
+ log_test $? 0 "IPv6 linkdown flag set"
+
+ set -e
+ $IP address add 192.0.2.1/24 dev dummy0
+ $IP -6 address add 2001:db8:2::1/64 dev dummy0
+ set +e
+
+ echo " Second address added with carrier down"
+ $IP route get fibmatch 192.0.2.2 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:2::2 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ $IP route get fibmatch 192.0.2.2 | \
+ grep -q "linkdown"
+ log_test $? 0 "IPv4 linkdown flag set"
+ $IP -6 route get fibmatch 2001:db8:2::2 | \
+ grep -q "linkdown"
+ log_test $? 0 "IPv6 linkdown flag set"
+
+ cleanup
+}
+
+fib_carrier_test()
+{
+ fib_carrier_local_test
+ fib_carrier_unicast_test
+}
+
+################################################################################
+# Tests on nexthop spec
+
+# run 'ip route add' with given spec
+add_rt()
+{
+ local desc="$1"
+ local erc=$2
+ local vrf=$3
+ local pfx=$4
+ local gw=$5
+ local dev=$6
+ local cmd out rc
+
+ [ "$vrf" = "-" ] && vrf="default"
+ [ -n "$gw" ] && gw="via $gw"
+ [ -n "$dev" ] && dev="dev $dev"
+
+ cmd="$IP route add vrf $vrf $pfx $gw $dev"
+ if [ "$VERBOSE" = "1" ]; then
+ printf "\n COMMAND: $cmd\n"
+ fi
+
+ out=$(eval $cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+ log_test $rc $erc "$desc"
+}
+
+fib4_nexthop()
+{
+ echo
+ echo "IPv4 nexthop tests"
+
+ echo "<<< write me >>>"
+}
+
+fib6_nexthop()
+{
+ local lldummy=$(get_linklocal dummy0)
+ local llv1=$(get_linklocal dummy0)
+
+ if [ -z "$lldummy" ]; then
+ echo "Failed to get linklocal address for dummy0"
+ return 1
+ fi
+ if [ -z "$llv1" ]; then
+ echo "Failed to get linklocal address for veth1"
+ return 1
+ fi
+
+ echo
+ echo "IPv6 nexthop tests"
+
+ add_rt "Directly connected nexthop, unicast address" 0 \
+ - 2001:db8:101::/64 2001:db8:1::2
+ add_rt "Directly connected nexthop, unicast address with device" 0 \
+ - 2001:db8:102::/64 2001:db8:1::2 "dummy0"
+ add_rt "Gateway is linklocal address" 0 \
+ - 2001:db8:103::1/64 $llv1 "veth0"
+
+ # fails because LL address requires a device
+ add_rt "Gateway is linklocal address, no device" 2 \
+ - 2001:db8:104::1/64 $llv1
+
+ # local address can not be a gateway
+ add_rt "Gateway can not be local unicast address" 2 \
+ - 2001:db8:105::/64 2001:db8:1::1
+ add_rt "Gateway can not be local unicast address, with device" 2 \
+ - 2001:db8:106::/64 2001:db8:1::1 "dummy0"
+ add_rt "Gateway can not be a local linklocal address" 2 \
+ - 2001:db8:107::1/64 $lldummy "dummy0"
+
+ # VRF tests
+ add_rt "Gateway can be local address in a VRF" 0 \
+ - 2001:db8:108::/64 2001:db8:51::2
+ add_rt "Gateway can be local address in a VRF, with device" 0 \
+ - 2001:db8:109::/64 2001:db8:51::2 "veth0"
+ add_rt "Gateway can be local linklocal address in a VRF" 0 \
+ - 2001:db8:110::1/64 $llv1 "veth0"
+
+ add_rt "Redirect to VRF lookup" 0 \
+ - 2001:db8:111::/64 "" "red"
+
+ add_rt "VRF route, gateway can be local address in default VRF" 0 \
+ red 2001:db8:112::/64 2001:db8:51::1
+
+ # local address in same VRF fails
+ add_rt "VRF route, gateway can not be a local address" 2 \
+ red 2001:db8:113::1/64 2001:db8:2::1
+ add_rt "VRF route, gateway can not be a local addr with device" 2 \
+ red 2001:db8:114::1/64 2001:db8:2::1 "dummy1"
+}
+
+# Default VRF:
+# dummy0 - 198.51.100.1/24 2001:db8:1::1/64
+# veth0 - 192.0.2.1/24 2001:db8:51::1/64
+#
+# VRF red:
+# dummy1 - 192.168.2.1/24 2001:db8:2::1/64
+# veth1 - 192.0.2.2/24 2001:db8:51::2/64
+#
+# [ dummy0 veth0 ]--[ veth1 dummy1 ]
+
+fib_nexthop_test()
+{
+ setup
+
+ set -e
+
+ $IP -4 rule add pref 32765 table local
+ $IP -4 rule del pref 0
+ $IP -6 rule add pref 32765 table local
+ $IP -6 rule del pref 0
+
+ $IP link add red type vrf table 1
+ $IP link set red up
+ $IP -4 route add vrf red unreachable default metric 4278198272
+ $IP -6 route add vrf red unreachable default metric 4278198272
+
+ $IP link add veth0 type veth peer name veth1
+ $IP link set dev veth0 up
+ $IP address add 192.0.2.1/24 dev veth0
+ $IP -6 address add 2001:db8:51::1/64 dev veth0
+
+ $IP link set dev veth1 vrf red up
+ $IP address add 192.0.2.2/24 dev veth1
+ $IP -6 address add 2001:db8:51::2/64 dev veth1
+
+ $IP link add dummy1 type dummy
+ $IP link set dev dummy1 vrf red up
+ $IP address add 192.168.2.1/24 dev dummy1
+ $IP -6 address add 2001:db8:2::1/64 dev dummy1
+ set +e
+
+ sleep 1
+ fib4_nexthop
+ fib6_nexthop
+
+ (
+ $IP link del dev dummy1
+ $IP link del veth0
+ $IP link del red
+ ) 2>/dev/null
+ cleanup
+}
+
+################################################################################
+# Tests on route add and replace
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf " COMMAND: $cmd\n"
+ stderr=
+ fi
+
+ out=$(eval $cmd $stderr)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+check_expected()
+{
+ local out="$1"
+ local expected="$2"
+ local rc=0
+
+ [ "${out}" = "${expected}" ] && return 0
+
+ if [ -z "${out}" ]; then
+ if [ "$VERBOSE" = "1" ]; then
+ printf "\nNo route entry found\n"
+ printf "Expected:\n"
+ printf " ${expected}\n"
+ fi
+ return 1
+ fi
+
+ # tricky way to convert output to 1-line without ip's
+ # messy '\'; this drops all extra white space
+ out=$(echo ${out})
+ if [ "${out}" != "${expected}" ]; then
+ rc=1
+ if [ "${VERBOSE}" = "1" ]; then
+ printf " Unexpected route entry. Have:\n"
+ printf " ${out}\n"
+ printf " Expected:\n"
+ printf " ${expected}\n\n"
+ fi
+ fi
+
+ return $rc
+}
+
+# add route for a prefix, flushing any existing routes first
+# expected to be the first step of a test
+add_route6()
+{
+ local pfx="$1"
+ local nh="$2"
+ local out
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo
+ echo " ##################################################"
+ echo
+ fi
+
+ run_cmd "$IP -6 ro flush ${pfx}"
+ [ $? -ne 0 ] && exit 1
+
+ out=$($IP -6 ro ls match ${pfx})
+ if [ -n "$out" ]; then
+ echo "Failed to flush routes for prefix used for tests."
+ exit 1
+ fi
+
+ run_cmd "$IP -6 ro add ${pfx} ${nh}"
+ if [ $? -ne 0 ]; then
+ echo "Failed to add initial route for test."
+ exit 1
+ fi
+}
+
+# add initial route - used in replace route tests
+add_initial_route6()
+{
+ add_route6 "2001:db8:104::/64" "$1"
+}
+
+check_route6()
+{
+ local pfx="2001:db8:104::/64"
+ local expected="$1"
+ local out
+ local rc=0
+
+ out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//')
+ check_expected "${out}" "${expected}"
+}
+
+route_cleanup()
+{
+ $IP li del red 2>/dev/null
+ $IP li del dummy1 2>/dev/null
+ $IP li del veth1 2>/dev/null
+ $IP li del veth3 2>/dev/null
+
+ cleanup &> /dev/null
+}
+
+route_setup()
+{
+ route_cleanup
+ setup
+
+ [ "${VERBOSE}" = "1" ] && set -x
+ set -e
+
+ $IP li add red up type vrf table 101
+ $IP li add veth1 type veth peer name veth2
+ $IP li add veth3 type veth peer name veth4
+
+ $IP li set veth1 up
+ $IP li set veth3 up
+ $IP li set veth2 vrf red up
+ $IP li set veth4 vrf red up
+ $IP li add dummy1 type dummy
+ $IP li set dummy1 vrf red up
+
+ $IP -6 addr add 2001:db8:101::1/64 dev veth1
+ $IP -6 addr add 2001:db8:101::2/64 dev veth2
+ $IP -6 addr add 2001:db8:103::1/64 dev veth3
+ $IP -6 addr add 2001:db8:103::2/64 dev veth4
+ $IP -6 addr add 2001:db8:104::1/64 dev dummy1
+
+ $IP addr add 172.16.101.1/24 dev veth1
+ $IP addr add 172.16.101.2/24 dev veth2
+ $IP addr add 172.16.103.1/24 dev veth3
+ $IP addr add 172.16.103.2/24 dev veth4
+ $IP addr add 172.16.104.1/24 dev dummy1
+
+ set +e
+}
+
+# assumption is that basic add of a single path route works
+# otherwise just adding an address on an interface is broken
+ipv6_rt_add()
+{
+ local rc
+
+ echo
+ echo "IPv6 route add / append tests"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2"
+ log_test $? 2 "Attempt to add duplicate route - gw"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro add 2001:db8:104::/64 dev veth3"
+ log_test $? 2 "Attempt to add duplicate route - dev only"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
+ log_test $? 2 "Attempt to add duplicate route - reject route"
+
+ # route append with same prefix adds a new route
+ # - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ log_test $? 0 "Append nexthop to existing route - gw"
+
+ # insert mpath directly
+ add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ log_test $? 0 "Add multipath route"
+
+ add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro add 2001:db8:104::/64 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ log_test $? 2 "Attempt to add duplicate multipath route"
+
+ # insert of a second route without append but different metric
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2 metric 512"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::3 metric 256"
+ rc=$?
+ fi
+ log_test $rc 0 "Route add with different metrics"
+
+ run_cmd "$IP -6 ro del 2001:db8:104::/64 metric 512"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:104::/64 via 2001:db8:103::3 dev veth3 metric 256 2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024"
+ rc=$?
+ fi
+ log_test $rc 0 "Route delete with metric"
+}
+
+ipv6_rt_replace_single()
+{
+ # single path with single path
+ #
+ add_initial_route6 "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:103::2"
+ check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
+ log_test $? 0 "Single path with single path"
+
+ # single path with multipath
+ #
+ add_initial_route6 "nexthop via 2001:db8:101::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::2"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ log_test $? 0 "Single path with multipath"
+
+ # single path with single path using MULTIPATH attribute
+ #
+ add_initial_route6 "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:103::2"
+ check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
+ log_test $? 0 "Single path with single path via multipath attribute"
+
+ # route replace fails - invalid nexthop
+ add_initial_route6 "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:104::2"
+ if [ $? -eq 0 ]; then
+ # previous command is expected to fail so if it returns 0
+ # that means the test failed.
+ log_test 0 1 "Invalid nexthop"
+ else
+ check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024"
+ log_test $? 0 "Invalid nexthop"
+ fi
+
+ # replace non-existent route
+ # - note use of change versus replace since ip adds NLM_F_CREATE
+ # for replace
+ add_initial_route6 "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro change 2001:db8:105::/64 via 2001:db8:101::2"
+ log_test $? 2 "Single path - replace of non-existent route"
+}
+
+ipv6_rt_replace_mpath()
+{
+ # multipath with multipath
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::3 dev veth3 weight 1"
+ log_test $? 0 "Multipath with multipath"
+
+ # multipath with single
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:101::3"
+ check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
+ log_test $? 0 "Multipath with single path"
+
+ # multipath with single
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3"
+ check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
+ log_test $? 0 "Multipath with single path via multipath attribute"
+
+ # multipath with dev-only
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 dev veth1"
+ check_route6 "2001:db8:104::/64 dev veth1 metric 1024"
+ log_test $? 0 "Multipath with dev-only"
+
+ # route replace fails - invalid nexthop 1
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ log_test $? 0 "Multipath - invalid first nexthop"
+
+ # route replace fails - invalid nexthop 2
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:113::3"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ log_test $? 0 "Multipath - invalid second nexthop"
+
+ # multipath non-existent route
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro change 2001:db8:105::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3"
+ log_test $? 2 "Multipath - replace of non-existent route"
+}
+
+ipv6_rt_replace()
+{
+ echo
+ echo "IPv6 route replace tests"
+
+ ipv6_rt_replace_single
+ ipv6_rt_replace_mpath
+}
+
+ipv6_route_test()
+{
+ route_setup
+
+ ipv6_rt_add
+ ipv6_rt_replace
+
+ route_cleanup
+}
+
+ip_addr_metric_check()
+{
+ ip addr help 2>&1 | grep -q metric
+ if [ $? -ne 0 ]; then
+ echo "iproute2 command does not support metric for addresses. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+ipv6_addr_metric_test()
+{
+ local rc
+
+ echo
+ echo "IPv6 prefix route tests"
+
+ ip_addr_metric_check || return 1
+
+ setup
+
+ set -e
+ $IP li add dummy1 type dummy
+ $IP li add dummy2 type dummy
+ $IP li set dummy1 up
+ $IP li set dummy2 up
+
+ # default entry is metric 256
+ run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64"
+ run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64"
+ set +e
+
+ check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 256 2001:db8:104::/64 dev dummy2 proto kernel metric 256"
+ log_test $? 0 "Default metric"
+
+ set -e
+ run_cmd "$IP -6 addr flush dev dummy1"
+ run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64 metric 257"
+ set +e
+
+ check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 256 2001:db8:104::/64 dev dummy1 proto kernel metric 257"
+ log_test $? 0 "User specified metric on first device"
+
+ set -e
+ run_cmd "$IP -6 addr flush dev dummy2"
+ run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64 metric 258"
+ set +e
+
+ check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 257 2001:db8:104::/64 dev dummy2 proto kernel metric 258"
+ log_test $? 0 "User specified metric on second device"
+
+ run_cmd "$IP -6 addr del dev dummy1 2001:db8:104::1/64 metric 257"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 258"
+ rc=$?
+ fi
+ log_test $rc 0 "Delete of address on first device"
+
+ run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::2/64 metric 259"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259"
+ rc=$?
+ fi
+ log_test $rc 0 "Modify metric of address"
+
+ # verify prefix route removed on down
+ run_cmd "ip netns exec testns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
+ run_cmd "$IP li set dev dummy2 down"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ out=$($IP -6 ro ls match 2001:db8:104::/64)
+ check_expected "${out}" ""
+ rc=$?
+ fi
+ log_test $rc 0 "Prefix route removed on link down"
+
+ # verify prefix route re-inserted with assigned metric
+ run_cmd "$IP li set dev dummy2 up"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259"
+ rc=$?
+ fi
+ log_test $rc 0 "Prefix route with metric on link up"
+
+ # verify peer metric added correctly
+ set -e
+ run_cmd "$IP -6 addr flush dev dummy2"
+ run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::1 peer 2001:db8:104::2 metric 260"
+ set +e
+
+ check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 260"
+ log_test $? 0 "Set metric with peer route on local side"
+ check_route6 "2001:db8:104::2 dev dummy2 proto kernel metric 260"
+ log_test $? 0 "Set metric with peer route on peer side"
+
+ set -e
+ run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::1 peer 2001:db8:104::3 metric 261"
+ set +e
+
+ check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 261"
+ log_test $? 0 "Modify metric and peer address on local side"
+ check_route6 "2001:db8:104::3 dev dummy2 proto kernel metric 261"
+ log_test $? 0 "Modify metric and peer address on peer side"
+
+ $IP li del dummy1
+ $IP li del dummy2
+ cleanup
+}
+
+# add route for a prefix, flushing any existing routes first
+# expected to be the first step of a test
+add_route()
+{
+ local pfx="$1"
+ local nh="$2"
+ local out
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo
+ echo " ##################################################"
+ echo
+ fi
+
+ run_cmd "$IP ro flush ${pfx}"
+ [ $? -ne 0 ] && exit 1
+
+ out=$($IP ro ls match ${pfx})
+ if [ -n "$out" ]; then
+ echo "Failed to flush routes for prefix used for tests."
+ exit 1
+ fi
+
+ run_cmd "$IP ro add ${pfx} ${nh}"
+ if [ $? -ne 0 ]; then
+ echo "Failed to add initial route for test."
+ exit 1
+ fi
+}
+
+# add initial route - used in replace route tests
+add_initial_route()
+{
+ add_route "172.16.104.0/24" "$1"
+}
+
+check_route()
+{
+ local pfx="172.16.104.0/24"
+ local expected="$1"
+ local out
+
+ out=$($IP ro ls match ${pfx})
+ check_expected "${out}" "${expected}"
+}
+
+# assumption is that basic add of a single path route works
+# otherwise just adding an address on an interface is broken
+ipv4_rt_add()
+{
+ local rc
+
+ echo
+ echo "IPv4 route add / append tests"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2"
+ log_test $? 2 "Attempt to add duplicate route - gw"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro add 172.16.104.0/24 dev veth3"
+ log_test $? 2 "Attempt to add duplicate route - dev only"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro add unreachable 172.16.104.0/24"
+ log_test $? 2 "Attempt to add duplicate route - reject route"
+
+ # iproute2 prepend only sets NLM_F_CREATE
+ # - adds a new route; does NOT convert existing route to ECMP
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro prepend 172.16.104.0/24 via 172.16.103.2"
+ check_route "172.16.104.0/24 via 172.16.103.2 dev veth3 172.16.104.0/24 via 172.16.101.2 dev veth1"
+ log_test $? 0 "Add new nexthop for existing prefix"
+
+ # route append with same prefix adds a new route
+ # - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2"
+ check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.2 dev veth3"
+ log_test $? 0 "Append nexthop to existing route - gw"
+
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro append 172.16.104.0/24 dev veth3"
+ check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 dev veth3 scope link"
+ log_test $? 0 "Append nexthop to existing route - dev only"
+
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro append unreachable 172.16.104.0/24"
+ check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 unreachable 172.16.104.0/24"
+ log_test $? 0 "Append nexthop to existing route - reject route"
+
+ run_cmd "$IP ro flush 172.16.104.0/24"
+ run_cmd "$IP ro add unreachable 172.16.104.0/24"
+ run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2"
+ check_route "unreachable 172.16.104.0/24 172.16.104.0/24 via 172.16.103.2 dev veth3"
+ log_test $? 0 "Append nexthop to existing reject route - gw"
+
+ run_cmd "$IP ro flush 172.16.104.0/24"
+ run_cmd "$IP ro add unreachable 172.16.104.0/24"
+ run_cmd "$IP ro append 172.16.104.0/24 dev veth3"
+ check_route "unreachable 172.16.104.0/24 172.16.104.0/24 dev veth3 scope link"
+ log_test $? 0 "Append nexthop to existing reject route - dev only"
+
+ # insert mpath directly
+ add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+ log_test $? 0 "add multipath route"
+
+ add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro add 172.16.104.0/24 nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ log_test $? 2 "Attempt to add duplicate multipath route"
+
+ # insert of a second route without append but different metric
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2 metric 512"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.3 metric 256"
+ rc=$?
+ fi
+ log_test $rc 0 "Route add with different metrics"
+
+ run_cmd "$IP ro del 172.16.104.0/24 metric 512"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.3 dev veth3 metric 256"
+ rc=$?
+ fi
+ log_test $rc 0 "Route delete with metric"
+}
+
+ipv4_rt_replace_single()
+{
+ # single path with single path
+ #
+ add_initial_route "via 172.16.101.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.103.2"
+ check_route "172.16.104.0/24 via 172.16.103.2 dev veth3"
+ log_test $? 0 "Single path with single path"
+
+ # single path with multipath
+ #
+ add_initial_route "nexthop via 172.16.101.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.2"
+ check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+ log_test $? 0 "Single path with multipath"
+
+ # single path with reject
+ #
+ add_initial_route "nexthop via 172.16.101.2"
+ run_cmd "$IP ro replace unreachable 172.16.104.0/24"
+ check_route "unreachable 172.16.104.0/24"
+ log_test $? 0 "Single path with reject route"
+
+ # single path with single path using MULTIPATH attribute
+ #
+ add_initial_route "via 172.16.101.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.103.2"
+ check_route "172.16.104.0/24 via 172.16.103.2 dev veth3"
+ log_test $? 0 "Single path with single path via multipath attribute"
+
+ # route replace fails - invalid nexthop
+ add_initial_route "via 172.16.101.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 via 2001:db8:104::2"
+ if [ $? -eq 0 ]; then
+ # previous command is expected to fail so if it returns 0
+ # that means the test failed.
+ log_test 0 1 "Invalid nexthop"
+ else
+ check_route "172.16.104.0/24 via 172.16.101.2 dev veth1"
+ log_test $? 0 "Invalid nexthop"
+ fi
+
+ # replace non-existent route
+ # - note use of change versus replace since ip adds NLM_F_CREATE
+ # for replace
+ add_initial_route "via 172.16.101.2"
+ run_cmd "$IP ro change 172.16.105.0/24 via 172.16.101.2"
+ log_test $? 2 "Single path - replace of non-existent route"
+}
+
+ipv4_rt_replace_mpath()
+{
+ # multipath with multipath
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3"
+ check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.3 dev veth3 weight 1"
+ log_test $? 0 "Multipath with multipath"
+
+ # multipath with single
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.101.3"
+ check_route "172.16.104.0/24 via 172.16.101.3 dev veth1"
+ log_test $? 0 "Multipath with single path"
+
+ # multipath with single
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3"
+ check_route "172.16.104.0/24 via 172.16.101.3 dev veth1"
+ log_test $? 0 "Multipath with single path via multipath attribute"
+
+ # multipath with reject
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace unreachable 172.16.104.0/24"
+ check_route "unreachable 172.16.104.0/24"
+ log_test $? 0 "Multipath with reject route"
+
+ # route replace fails - invalid nexthop 1
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.111.3 nexthop via 172.16.103.3"
+ check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+ log_test $? 0 "Multipath - invalid first nexthop"
+
+ # route replace fails - invalid nexthop 2
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.113.3"
+ check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+ log_test $? 0 "Multipath - invalid second nexthop"
+
+ # multipath non-existent route
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro change 172.16.105.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3"
+ log_test $? 2 "Multipath - replace of non-existent route"
+}
+
+ipv4_rt_replace()
+{
+ echo
+ echo "IPv4 route replace tests"
+
+ ipv4_rt_replace_single
+ ipv4_rt_replace_mpath
+}
+
+ipv4_route_test()
+{
+ route_setup
+
+ ipv4_rt_add
+ ipv4_rt_replace
+
+ route_cleanup
+}
+
+ipv4_addr_metric_test()
+{
+ local rc
+
+ echo
+ echo "IPv4 prefix route tests"
+
+ ip_addr_metric_check || return 1
+
+ setup
+
+ set -e
+ $IP li add dummy1 type dummy
+ $IP li add dummy2 type dummy
+ $IP li set dummy1 up
+ $IP li set dummy2 up
+
+ # default entry is metric 256
+ run_cmd "$IP addr add dev dummy1 172.16.104.1/24"
+ run_cmd "$IP addr add dev dummy2 172.16.104.2/24"
+ set +e
+
+ check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2"
+ log_test $? 0 "Default metric"
+
+ set -e
+ run_cmd "$IP addr flush dev dummy1"
+ run_cmd "$IP addr add dev dummy1 172.16.104.1/24 metric 257"
+ set +e
+
+ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257"
+ log_test $? 0 "User specified metric on first device"
+
+ set -e
+ run_cmd "$IP addr flush dev dummy2"
+ run_cmd "$IP addr add dev dummy2 172.16.104.2/24 metric 258"
+ set +e
+
+ check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258"
+ log_test $? 0 "User specified metric on second device"
+
+ run_cmd "$IP addr del dev dummy1 172.16.104.1/24 metric 257"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258"
+ rc=$?
+ fi
+ log_test $rc 0 "Delete of address on first device"
+
+ run_cmd "$IP addr change dev dummy2 172.16.104.2/24 metric 259"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259"
+ rc=$?
+ fi
+ log_test $rc 0 "Modify metric of address"
+
+ # verify prefix route removed on down
+ run_cmd "$IP li set dev dummy2 down"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ out=$($IP ro ls match 172.16.104.0/24)
+ check_expected "${out}" ""
+ rc=$?
+ fi
+ log_test $rc 0 "Prefix route removed on link down"
+
+ # verify prefix route re-inserted with assigned metric
+ run_cmd "$IP li set dev dummy2 up"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259"
+ rc=$?
+ fi
+ log_test $rc 0 "Prefix route with metric on link up"
+
+ # explicitly check for metric changes on edge scenarios
+ run_cmd "$IP addr flush dev dummy2"
+ run_cmd "$IP addr add dev dummy2 172.16.104.0/24 metric 259"
+ run_cmd "$IP addr change dev dummy2 172.16.104.0/24 metric 260"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.0 metric 260"
+ rc=$?
+ fi
+ log_test $rc 0 "Modify metric of .0/24 address"
+
+ run_cmd "$IP addr flush dev dummy2"
+ run_cmd "$IP addr add dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 260"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 260"
+ rc=$?
+ fi
+ log_test $rc 0 "Set metric of address with peer route"
+
+ run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.3 metric 261"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.3 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261"
+ rc=$?
+ fi
+ log_test $rc 0 "Modify metric and peer address for peer route"
+
+ $IP li del dummy1
+ $IP li del dummy2
+ cleanup
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -P Pause after each test before cleanup
+ -v verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :t:pPhv o
+do
+ case $o in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+PEER_CMD="ip netns exec ${PEER_NS}"
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip route help 2>&1 | grep -q fibmatch
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing fibmatch"
+ exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+for t in $TESTS
+do
+ case $t in
+ fib_unreg_test|unregister) fib_unreg_test;;
+ fib_down_test|down) fib_down_test;;
+ fib_carrier_test|carrier) fib_carrier_test;;
+ fib_nexthop_test|nexthop) fib_nexthop_test;;
+ ipv6_route_test|ipv6_rt) ipv6_route_test;;
+ ipv4_route_test|ipv4_rt) ipv4_route_test;;
+ ipv6_addr_metric) ipv6_addr_metric_test;;
+ ipv4_addr_metric) ipv4_addr_metric_test;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+ esac
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore
new file mode 100644
index 000000000..a793eef5b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/.gitignore
@@ -0,0 +1 @@
+forwarding.config
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
new file mode 100644
index 000000000..b8a2af8fc
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/README
@@ -0,0 +1,58 @@
+Motivation
+==========
+
+One of the nice things about network namespaces is that they allow one
+to easily create and test complex environments.
+
+Unfortunately, these namespaces can not be used with actual switching
+ASICs, as their ports can not be migrated to other network namespaces
+(NETIF_F_NETNS_LOCAL) and most of them probably do not support the
+L1-separation provided by namespaces.
+
+However, a similar kind of flexibility can be achieved by using VRFs and
+by looping the switch ports together. For example:
+
+ br0
+ +
+ vrf-h1 | vrf-h2
+ + +---+----+ +
+ | | | |
+ 192.0.2.1/24 + + + + 192.0.2.2/24
+ swp1 swp2 swp3 swp4
+ + + + +
+ | | | |
+ +--------+ +--------+
+
+The VRFs act as lightweight namespaces representing hosts connected to
+the switch.
+
+This approach for testing switch ASICs has several advantages over the
+traditional method that requires multiple physical machines, to name a
+few:
+
+1. Only the device under test (DUT) is being tested without noise from
+other system.
+
+2. Ability to easily provision complex topologies. Testing bridging
+between 4-ports LAGs or 8-way ECMP requires many physical links that are
+not always available. With the VRF-based approach one merely needs to
+loopback more ports.
+
+These tests are written with switch ASICs in mind, but they can be run
+on any Linux box using veth pairs to emulate physical loopbacks.
+
+Guidelines for Writing Tests
+============================
+
+o Where possible, reuse an existing topology for different tests instead
+ of recreating the same topology.
+o Tests that use anything but the most trivial topologies should include
+ an ASCII art showing the topology.
+o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and
+ RFC 5737, respectively.
+o Where possible, tests shall be written so that they can be reused by
+ multiple topologies and added to lib.sh.
+o Checks shall be added to lib.sh for any external dependencies.
+o Code shall be checked using ShellCheck [1] prior to submission.
+
+1. https://www.shellcheck.net/
diff --git a/tools/testing/selftests/net/forwarding/bridge_port_isolation.sh b/tools/testing/selftests/net/forwarding/bridge_port_isolation.sh
new file mode 100755
index 000000000..a43b4645c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_port_isolation.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 flooding"
+NUM_NETIFS=6
+CHECK_TC="yes"
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h3_create()
+{
+ simple_if_init $h3 192.0.2.3/24 2001:db8:1::3/64
+}
+
+h3_destroy()
+{
+ simple_if_fini $h3 192.0.2.3/24 2001:db8:1::3/64
+}
+
+switch_create()
+{
+ ip link add dev br0 type bridge
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+ ip link set dev $swp3 master br0
+
+ ip link set dev $swp1 type bridge_slave isolated on
+ check_err $? "Can't set isolation on port $swp1"
+ ip link set dev $swp2 type bridge_slave isolated on
+ check_err $? "Can't set isolation on port $swp2"
+ ip link set dev $swp3 type bridge_slave isolated off
+ check_err $? "Can't disable isolation on port $swp3"
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+ ip link set dev $swp3 up
+}
+
+switch_destroy()
+{
+ ip link set dev $swp3 down
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ RET=0
+ ping_do $h1 192.0.2.2
+ check_fail $? "Ping worked when it should not have"
+
+ RET=0
+ ping_do $h3 192.0.2.2
+ check_err $? "Ping didn't work when it should have"
+
+ log_test "Isolated port ping"
+}
+
+ping_ipv6()
+{
+ RET=0
+ ping6_do $h1 2001:db8:1::2
+ check_fail $? "Ping6 worked when it should not have"
+
+ RET=0
+ ping6_do $h3 2001:db8:1::2
+ check_err $? "Ping6 didn't work when it should have"
+
+ log_test "Isolated port ping6"
+}
+
+flooding()
+{
+ local mac=de:ad:be:ef:13:37
+ local ip=192.0.2.100
+
+ RET=0
+ flood_test_do false $mac $ip $h1 $h2
+ check_err $? "Packet was flooded when it should not have been"
+
+ RET=0
+ flood_test_do true $mac $ip $h3 $h2
+ check_err $? "Packet was not flooded when it should have been"
+
+ log_test "Isolated port flooding"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
new file mode 100755
index 000000000..b90dff8d3
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn"
+NUM_NETIFS=4
+CHECK_TC="yes"
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ # 10 Seconds ageing time.
+ ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+ mcast_snooping 0
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2
+}
+
+learning()
+{
+ learning_test "br0" $swp1 $h1 $h2
+}
+
+flooding()
+{
+ flood_test $swp2 $h1 $h2
+}
+
+vlan_deletion()
+{
+ # Test that the deletion of a VLAN on a bridge port does not affect
+ # the PVID VLAN
+ log_info "Add and delete a VLAN on bridge port $swp1"
+
+ bridge vlan add vid 10 dev $swp1
+ bridge vlan del vid 10 dev $swp1
+
+ ping_ipv4
+ ping_ipv6
+}
+
+extern_learn()
+{
+ local mac=de:ad:be:ef:13:37
+ local ageing_time
+
+ # Test that externally learned FDB entries can roam, but not age out
+ RET=0
+
+ bridge fdb add de:ad:be:ef:13:37 dev $swp1 master extern_learn vlan 1
+
+ bridge fdb show brport $swp1 | grep -q de:ad:be:ef:13:37
+ check_err $? "Did not find FDB entry when should"
+
+ # Wait for 10 seconds after the ageing time to make sure the FDB entry
+ # was not aged out
+ ageing_time=$(bridge_ageing_time_get br0)
+ sleep $((ageing_time + 10))
+
+ bridge fdb show brport $swp1 | grep -q de:ad:be:ef:13:37
+ check_err $? "FDB entry was aged out when should not"
+
+ $MZ $h2 -c 1 -p 64 -a $mac -t ip -q
+
+ bridge fdb show brport $swp2 | grep -q de:ad:be:ef:13:37
+ check_err $? "FDB entry did not roam when should"
+
+ log_test "Externally learned FDB entry - ageing & roaming"
+
+ bridge fdb del de:ad:be:ef:13:37 dev $swp2 master vlan 1 &> /dev/null
+ bridge fdb del de:ad:be:ef:13:37 dev $swp1 master vlan 1 &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
new file mode 100755
index 000000000..c15c6c85c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ # 10 Seconds ageing time.
+ ip link add dev br0 type bridge ageing_time 1000 mcast_snooping 0
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2
+}
+
+learning()
+{
+ learning_test "br0" $swp1 $h1 $h2
+}
+
+flooding()
+{
+ flood_test $swp2 $h1 $h2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
new file mode 100644
index 000000000..5cd2aed97
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/config
@@ -0,0 +1,12 @@
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_VRF=m
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
new file mode 100644
index 000000000..5ab1e5f43
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Source library
+
+relative_path="${BASH_SOURCE%/*}"
+if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
+ relative_path="."
+fi
+
+source "$relative_path/lib.sh"
+
+##############################################################################
+# Defines
+
+DEVLINK_DEV=$(devlink port show | grep "${NETIFS[p1]}" | \
+ grep -v "${NETIFS[p1]}[0-9]" | cut -d" " -f1 | \
+ rev | cut -d"/" -f2- | rev)
+if [ -z "$DEVLINK_DEV" ]; then
+ echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
+ exit 1
+fi
+if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
+ echo "SKIP: devlink device's bus is not PCI"
+ exit 1
+fi
+
+DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
+ -n | cut -d" " -f3)
+
+##############################################################################
+# Sanity checks
+
+devlink -j resource show "$DEVLINK_DEV" &> /dev/null
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing devlink resource support"
+ exit 1
+fi
+
+##############################################################################
+# Devlink helpers
+
+devlink_resource_names_to_path()
+{
+ local resource
+ local path=""
+
+ for resource in "${@}"; do
+ if [ "$path" == "" ]; then
+ path="$resource"
+ else
+ path="${path}/$resource"
+ fi
+ done
+
+ echo "$path"
+}
+
+devlink_resource_get()
+{
+ local name=$1
+ local resource_name=.[][\"$DEVLINK_DEV\"]
+
+ resource_name="$resource_name | .[] | select (.name == \"$name\")"
+
+ shift
+ for resource in "${@}"; do
+ resource_name="${resource_name} | .[\"resources\"][] | \
+ select (.name == \"$resource\")"
+ done
+
+ devlink -j resource show "$DEVLINK_DEV" | jq "$resource_name"
+}
+
+devlink_resource_size_get()
+{
+ local size=$(devlink_resource_get "$@" | jq '.["size_new"]')
+
+ if [ "$size" == "null" ]; then
+ devlink_resource_get "$@" | jq '.["size"]'
+ else
+ echo "$size"
+ fi
+}
+
+devlink_resource_size_set()
+{
+ local new_size=$1
+ local path
+
+ shift
+ path=$(devlink_resource_names_to_path "$@")
+ devlink resource set "$DEVLINK_DEV" path "$path" size "$new_size"
+ check_err $? "Failed setting path $path to size $size"
+}
+
+devlink_reload()
+{
+ local still_pending
+
+ devlink dev reload "$DEVLINK_DEV" &> /dev/null
+ check_err $? "Failed reload"
+
+ still_pending=$(devlink resource show "$DEVLINK_DEV" | \
+ grep -c "size_new")
+ check_err $still_pending "Failed reload - There are still unset sizes"
+}
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
new file mode 100644
index 000000000..e819d049d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Topology description. p1 looped back to p2, p3 to p4 and so on.
+declare -A NETIFS
+
+NETIFS[p1]=veth0
+NETIFS[p2]=veth1
+NETIFS[p3]=veth2
+NETIFS[p4]=veth3
+NETIFS[p5]=veth4
+NETIFS[p6]=veth5
+NETIFS[p7]=veth6
+NETIFS[p8]=veth7
+
+##############################################################################
+# Defines
+
+# IPv4 ping utility name
+PING=ping
+# IPv6 ping utility name. Some distributions use 'ping' for IPv6.
+PING6=ping6
+# Packet generator. Some distributions use 'mz'.
+MZ=mausezahn
+# Time to wait after interfaces participating in the test are all UP
+WAIT_TIME=5
+# Whether to pause on failure or not.
+PAUSE_ON_FAIL=no
+# Whether to pause on cleanup or not.
+PAUSE_ON_CLEANUP=no
+# Type of network interface to create
+NETIF_TYPE=veth
+# Whether to create virtual interfaces (veth) or not
+NETIF_CREATE=yes
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh
new file mode 100755
index 000000000..a8d8e8b3d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_multipath.sh
@@ -0,0 +1,257 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when a wECMP route forwards traffic to two GRE
+# tunnels.
+#
+# +-------------------------+
+# | H1 |
+# | $h1 + |
+# | 192.0.2.1/28 | |
+# +-------------------|-----+
+# |
+# +-------------------|------------------------+
+# | SW1 | |
+# | $ol1 + |
+# | 192.0.2.2/28 |
+# | |
+# | + g1a (gre) + g1b (gre) |
+# | loc=192.0.2.65 loc=192.0.2.81 |
+# | rem=192.0.2.66 --. rem=192.0.2.82 --. |
+# | tos=inherit | tos=inherit | |
+# | .------------------' | |
+# | | .------------------' |
+# | v v |
+# | + $ul1.111 (vlan) + $ul1.222 (vlan) |
+# | | 192.0.2.129/28 | 192.0.2.145/28 |
+# | \ / |
+# | \________________/ |
+# | | |
+# | + $ul1 |
+# +------------|-------------------------------+
+# |
+# +------------|-------------------------------+
+# | SW2 + $ul2 |
+# | _______|________ |
+# | / \ |
+# | / \ |
+# | + $ul2.111 (vlan) + $ul2.222 (vlan) |
+# | ^ 192.0.2.130/28 ^ 192.0.2.146/28 |
+# | | | |
+# | | '------------------. |
+# | '------------------. | |
+# | + g2a (gre) | + g2b (gre) | |
+# | loc=192.0.2.66 | loc=192.0.2.82 | |
+# | rem=192.0.2.65 --' rem=192.0.2.81 --' |
+# | tos=inherit tos=inherit |
+# | |
+# | $ol2 + |
+# | 192.0.2.17/28 | |
+# +-------------------|------------------------+
+# |
+# +-------------------|-----+
+# | H2 | |
+# | $h2 + |
+# | 192.0.2.18/28 |
+# +-------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ multipath_ipv4
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+ ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+sw1_create()
+{
+ simple_if_init $ol1 192.0.2.2/28
+ __simple_if_init $ul1 v$ol1
+ vlan_create $ul1 111 v$ol1 192.0.2.129/28
+ vlan_create $ul1 222 v$ol1 192.0.2.145/28
+
+ tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+ __simple_if_init g1a v$ol1 192.0.2.65/32
+ ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+ tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1
+ __simple_if_init g1b v$ol1 192.0.2.81/32
+ ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+
+ ip route add vrf v$ol1 192.0.2.16/28 \
+ nexthop dev g1a \
+ nexthop dev g1b
+}
+
+sw1_destroy()
+{
+ ip route del vrf v$ol1 192.0.2.16/28
+
+ ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+ __simple_if_fini g1b 192.0.2.81/32
+ tunnel_destroy g1b
+
+ ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+ __simple_if_fini g1a 192.0.2.65/32
+ tunnel_destroy g1a
+
+ vlan_destroy $ul1 222
+ vlan_destroy $ul1 111
+ __simple_if_fini $ul1
+ simple_if_fini $ol1 192.0.2.2/28
+}
+
+sw2_create()
+{
+ simple_if_init $ol2 192.0.2.17/28
+ __simple_if_init $ul2 v$ol2
+ vlan_create $ul2 111 v$ol2 192.0.2.130/28
+ vlan_create $ul2 222 v$ol2 192.0.2.146/28
+
+ tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2
+ __simple_if_init g2a v$ol2 192.0.2.66/32
+ ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+
+ tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2
+ __simple_if_init g2b v$ol2 192.0.2.82/32
+ ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+
+ ip route add vrf v$ol2 192.0.2.0/28 \
+ nexthop dev g2a \
+ nexthop dev g2b
+
+ tc qdisc add dev $ul2 clsact
+ tc filter add dev $ul2 ingress pref 111 prot 802.1Q \
+ flower vlan_id 111 action pass
+ tc filter add dev $ul2 ingress pref 222 prot 802.1Q \
+ flower vlan_id 222 action pass
+}
+
+sw2_destroy()
+{
+ tc qdisc del dev $ul2 clsact
+
+ ip route del vrf v$ol2 192.0.2.0/28
+
+ ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+ __simple_if_fini g2b 192.0.2.82/32
+ tunnel_destroy g2b
+
+ ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+ __simple_if_fini g2a 192.0.2.66/32
+ tunnel_destroy g2a
+
+ vlan_destroy $ul2 222
+ vlan_destroy $ul2 111
+ __simple_if_fini $ul2
+ simple_if_fini $ol2 192.0.2.17/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.18/28
+ ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+}
+
+h2_destroy()
+{
+ ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+ simple_if_fini $h2 192.0.2.18/28
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ ol1=${NETIFS[p2]}
+
+ ul1=${NETIFS[p3]}
+ ul2=${NETIFS[p4]}
+
+ ol2=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+multipath4_test()
+{
+ local what=$1; shift
+ local weight1=$1; shift
+ local weight2=$1; shift
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+ ip route replace vrf v$ol1 192.0.2.16/28 \
+ nexthop dev g1a weight $weight1 \
+ nexthop dev g1b weight $weight2
+
+ local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ ip vrf exec v$h1 \
+ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+ multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+ ip route replace vrf v$ol1 192.0.2.16/28 \
+ nexthop dev g1a \
+ nexthop dev g1b
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.18
+}
+
+multipath_ipv4()
+{
+ log_info "Running IPv4 multipath tests"
+ multipath4_test "ECMP" 1 1
+ multipath4_test "Weighted MP 2:1" 2 1
+ multipath4_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
new file mode 100644
index 000000000..08bac6cf1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -0,0 +1,964 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+# Can be overridden by the configuration file.
+PING=${PING:=ping}
+PING6=${PING6:=ping6}
+MZ=${MZ:=mausezahn}
+ARPING=${ARPING:=arping}
+TEAMD=${TEAMD:=teamd}
+WAIT_TIME=${WAIT_TIME:=5}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
+NETIF_TYPE=${NETIF_TYPE:=veth}
+NETIF_CREATE=${NETIF_CREATE:=yes}
+
+relative_path="${BASH_SOURCE%/*}"
+if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
+ relative_path="."
+fi
+
+if [[ -f $relative_path/forwarding.config ]]; then
+ source "$relative_path/forwarding.config"
+fi
+
+##############################################################################
+# Sanity checks
+
+check_tc_version()
+{
+ tc -j &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing JSON support"
+ exit 1
+ fi
+}
+
+check_tc_shblock_support()
+{
+ tc filter help 2>&1 | grep block &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing shared block support"
+ exit 1
+ fi
+}
+
+check_tc_chain_support()
+{
+ tc help 2>&1|grep chain &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing chain support"
+ exit 1
+ fi
+}
+
+if [[ "$(id -u)" -ne 0 ]]; then
+ echo "SKIP: need root privileges"
+ exit 0
+fi
+
+if [[ "$CHECK_TC" = "yes" ]]; then
+ check_tc_version
+fi
+
+require_command()
+{
+ local cmd=$1; shift
+
+ if [[ ! -x "$(command -v "$cmd")" ]]; then
+ echo "SKIP: $cmd not installed"
+ exit 1
+ fi
+}
+
+require_command jq
+require_command $MZ
+
+if [[ ! -v NUM_NETIFS ]]; then
+ echo "SKIP: importer does not define \"NUM_NETIFS\""
+ exit 1
+fi
+
+##############################################################################
+# Command line options handling
+
+count=0
+
+while [[ $# -gt 0 ]]; do
+ if [[ "$count" -eq "0" ]]; then
+ unset NETIFS
+ declare -A NETIFS
+ fi
+ count=$((count + 1))
+ NETIFS[p$count]="$1"
+ shift
+done
+
+##############################################################################
+# Network interfaces configuration
+
+create_netif_veth()
+{
+ local i
+
+ for i in $(eval echo {1..$NUM_NETIFS}); do
+ local j=$((i+1))
+
+ ip link show dev ${NETIFS[p$i]} &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ ip link add ${NETIFS[p$i]} type veth \
+ peer name ${NETIFS[p$j]}
+ if [[ $? -ne 0 ]]; then
+ echo "Failed to create netif"
+ exit 1
+ fi
+ fi
+ i=$j
+ done
+}
+
+create_netif()
+{
+ case "$NETIF_TYPE" in
+ veth) create_netif_veth
+ ;;
+ *) echo "Can not create interfaces of type \'$NETIF_TYPE\'"
+ exit 1
+ ;;
+ esac
+}
+
+if [[ "$NETIF_CREATE" = "yes" ]]; then
+ create_netif
+fi
+
+for i in $(eval echo {1..$NUM_NETIFS}); do
+ ip link show dev ${NETIFS[p$i]} &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: could not find all required interfaces"
+ exit 1
+ fi
+done
+
+##############################################################################
+# Helpers
+
+# Exit status to return at the end. Set in case one of the tests fails.
+EXIT_STATUS=0
+# Per-test return value. Clear at the beginning of each test.
+RET=0
+
+check_err()
+{
+ local err=$1
+ local msg=$2
+
+ if [[ $RET -eq 0 && $err -ne 0 ]]; then
+ RET=$err
+ retmsg=$msg
+ fi
+}
+
+check_fail()
+{
+ local err=$1
+ local msg=$2
+
+ if [[ $RET -eq 0 && $err -eq 0 ]]; then
+ RET=1
+ retmsg=$msg
+ fi
+}
+
+check_err_fail()
+{
+ local should_fail=$1; shift
+ local err=$1; shift
+ local what=$1; shift
+
+ if ((should_fail)); then
+ check_fail $err "$what succeeded, but should have failed"
+ else
+ check_err $err "$what failed"
+ fi
+}
+
+log_test()
+{
+ local test_name=$1
+ local opt_str=$2
+
+ if [[ $# -eq 2 ]]; then
+ opt_str="($opt_str)"
+ fi
+
+ if [[ $RET -ne 0 ]]; then
+ EXIT_STATUS=1
+ printf "TEST: %-60s [FAIL]\n" "$test_name $opt_str"
+ if [[ ! -z "$retmsg" ]]; then
+ printf "\t%s\n" "$retmsg"
+ fi
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo "Hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ return 1
+ fi
+
+ printf "TEST: %-60s [PASS]\n" "$test_name $opt_str"
+ return 0
+}
+
+log_info()
+{
+ local msg=$1
+
+ echo "INFO: $msg"
+}
+
+setup_wait_dev()
+{
+ local dev=$1; shift
+
+ while true; do
+ ip link show dev $dev up \
+ | grep 'state UP' &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ sleep 1
+ else
+ break
+ fi
+ done
+}
+
+setup_wait()
+{
+ local num_netifs=${1:-$NUM_NETIFS}
+
+ for ((i = 1; i <= num_netifs; ++i)); do
+ setup_wait_dev ${NETIFS[p$i]}
+ done
+
+ # Make sure links are ready.
+ sleep $WAIT_TIME
+}
+
+lldpad_app_wait_set()
+{
+ local dev=$1; shift
+
+ while lldptool -t -i $dev -V APP -c app | grep -Eq "pending|unknown"; do
+ echo "$dev: waiting for lldpad to push pending APP updates"
+ sleep 5
+ done
+}
+
+lldpad_app_wait_del()
+{
+ # Give lldpad a chance to push down the changes. If the device is downed
+ # too soon, the updates will be left pending. However, they will have
+ # been struck off the lldpad's DB already, so we won't be able to tell
+ # they are pending. Then on next test iteration this would cause
+ # weirdness as newly-added APP rules conflict with the old ones,
+ # sometimes getting stuck in an "unknown" state.
+ sleep 5
+}
+
+pre_cleanup()
+{
+ if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
+ echo "Pausing before cleanup, hit any key to continue"
+ read
+ fi
+}
+
+vrf_prepare()
+{
+ ip -4 rule add pref 32765 table local
+ ip -4 rule del pref 0
+ ip -6 rule add pref 32765 table local
+ ip -6 rule del pref 0
+}
+
+vrf_cleanup()
+{
+ ip -6 rule add pref 0 table local
+ ip -6 rule del pref 32765
+ ip -4 rule add pref 0 table local
+ ip -4 rule del pref 32765
+}
+
+__last_tb_id=0
+declare -A __TB_IDS
+
+__vrf_td_id_assign()
+{
+ local vrf_name=$1
+
+ __last_tb_id=$((__last_tb_id + 1))
+ __TB_IDS[$vrf_name]=$__last_tb_id
+ return $__last_tb_id
+}
+
+__vrf_td_id_lookup()
+{
+ local vrf_name=$1
+
+ return ${__TB_IDS[$vrf_name]}
+}
+
+vrf_create()
+{
+ local vrf_name=$1
+ local tb_id
+
+ __vrf_td_id_assign $vrf_name
+ tb_id=$?
+
+ ip link add dev $vrf_name type vrf table $tb_id
+ ip -4 route add table $tb_id unreachable default metric 4278198272
+ ip -6 route add table $tb_id unreachable default metric 4278198272
+}
+
+vrf_destroy()
+{
+ local vrf_name=$1
+ local tb_id
+
+ __vrf_td_id_lookup $vrf_name
+ tb_id=$?
+
+ ip -6 route del table $tb_id unreachable default metric 4278198272
+ ip -4 route del table $tb_id unreachable default metric 4278198272
+ ip link del dev $vrf_name
+}
+
+__addr_add_del()
+{
+ local if_name=$1
+ local add_del=$2
+ local array
+
+ shift
+ shift
+ array=("${@}")
+
+ for addrstr in "${array[@]}"; do
+ ip address $add_del $addrstr dev $if_name
+ done
+}
+
+__simple_if_init()
+{
+ local if_name=$1; shift
+ local vrf_name=$1; shift
+ local addrs=("${@}")
+
+ ip link set dev $if_name master $vrf_name
+ ip link set dev $if_name up
+
+ __addr_add_del $if_name add "${addrs[@]}"
+}
+
+__simple_if_fini()
+{
+ local if_name=$1; shift
+ local addrs=("${@}")
+
+ __addr_add_del $if_name del "${addrs[@]}"
+
+ ip link set dev $if_name down
+ ip link set dev $if_name nomaster
+}
+
+simple_if_init()
+{
+ local if_name=$1
+ local vrf_name
+ local array
+
+ shift
+ vrf_name=v$if_name
+ array=("${@}")
+
+ vrf_create $vrf_name
+ ip link set dev $vrf_name up
+ __simple_if_init $if_name $vrf_name "${array[@]}"
+}
+
+simple_if_fini()
+{
+ local if_name=$1
+ local vrf_name
+ local array
+
+ shift
+ vrf_name=v$if_name
+ array=("${@}")
+
+ __simple_if_fini $if_name "${array[@]}"
+ vrf_destroy $vrf_name
+}
+
+tunnel_create()
+{
+ local name=$1; shift
+ local type=$1; shift
+ local local=$1; shift
+ local remote=$1; shift
+
+ ip link add name $name type $type \
+ local $local remote $remote "$@"
+ ip link set dev $name up
+}
+
+tunnel_destroy()
+{
+ local name=$1; shift
+
+ ip link del dev $name
+}
+
+vlan_create()
+{
+ local if_name=$1; shift
+ local vid=$1; shift
+ local vrf=$1; shift
+ local ips=("${@}")
+ local name=$if_name.$vid
+
+ ip link add name $name link $if_name type vlan id $vid
+ if [ "$vrf" != "" ]; then
+ ip link set dev $name master $vrf
+ fi
+ ip link set dev $name up
+ __addr_add_del $name add "${ips[@]}"
+}
+
+vlan_destroy()
+{
+ local if_name=$1; shift
+ local vid=$1; shift
+ local name=$if_name.$vid
+
+ ip link del dev $name
+}
+
+team_create()
+{
+ local if_name=$1; shift
+ local mode=$1; shift
+
+ require_command $TEAMD
+ $TEAMD -t $if_name -d -c '{"runner": {"name": "'$mode'"}}'
+ for slave in "$@"; do
+ ip link set dev $slave down
+ ip link set dev $slave master $if_name
+ ip link set dev $slave up
+ done
+ ip link set dev $if_name up
+}
+
+team_destroy()
+{
+ local if_name=$1; shift
+
+ $TEAMD -t $if_name -k
+}
+
+master_name_get()
+{
+ local if_name=$1
+
+ ip -j link show dev $if_name | jq -r '.[]["master"]'
+}
+
+link_stats_tx_packets_get()
+{
+ local if_name=$1
+
+ ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
+}
+
+tc_rule_stats_get()
+{
+ local dev=$1; shift
+ local pref=$1; shift
+ local dir=$1; shift
+
+ tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \
+ | jq '.[1].options.actions[].stats.packets'
+}
+
+mac_get()
+{
+ local if_name=$1
+
+ ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
+bridge_ageing_time_get()
+{
+ local bridge=$1
+ local ageing_time
+
+ # Need to divide by 100 to convert to seconds.
+ ageing_time=$(ip -j -d link show dev $bridge \
+ | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
+ echo $((ageing_time / 100))
+}
+
+declare -A SYSCTL_ORIG
+sysctl_set()
+{
+ local key=$1; shift
+ local value=$1; shift
+
+ SYSCTL_ORIG[$key]=$(sysctl -n $key)
+ sysctl -qw $key=$value
+}
+
+sysctl_restore()
+{
+ local key=$1; shift
+
+ sysctl -qw $key=${SYSCTL_ORIG["$key"]}
+}
+
+forwarding_enable()
+{
+ sysctl_set net.ipv4.conf.all.forwarding 1
+ sysctl_set net.ipv6.conf.all.forwarding 1
+}
+
+forwarding_restore()
+{
+ sysctl_restore net.ipv6.conf.all.forwarding
+ sysctl_restore net.ipv4.conf.all.forwarding
+}
+
+tc_offload_check()
+{
+ local num_netifs=${1:-$NUM_NETIFS}
+
+ for ((i = 1; i <= num_netifs; ++i)); do
+ ethtool -k ${NETIFS[p$i]} \
+ | grep "hw-tc-offload: on" &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+trap_install()
+{
+ local dev=$1; shift
+ local direction=$1; shift
+
+ # Some devices may not support or need in-hardware trapping of traffic
+ # (e.g. the veth pairs that this library creates for non-existent
+ # loopbacks). Use continue instead, so that there is a filter in there
+ # (some tests check counters), and so that other filters are still
+ # processed.
+ tc filter add dev $dev $direction pref 1 \
+ flower skip_sw action trap 2>/dev/null \
+ || tc filter add dev $dev $direction pref 1 \
+ flower action continue
+}
+
+trap_uninstall()
+{
+ local dev=$1; shift
+ local direction=$1; shift
+
+ tc filter del dev $dev $direction pref 1 flower
+}
+
+slow_path_trap_install()
+{
+ # For slow-path testing, we need to install a trap to get to
+ # slow path the packets that would otherwise be switched in HW.
+ if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+ trap_install "$@"
+ fi
+}
+
+slow_path_trap_uninstall()
+{
+ if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+ trap_uninstall "$@"
+ fi
+}
+
+__icmp_capture_add_del()
+{
+ local add_del=$1; shift
+ local pref=$1; shift
+ local vsuf=$1; shift
+ local tundev=$1; shift
+ local filter=$1; shift
+
+ tc filter $add_del dev "$tundev" ingress \
+ proto ip$vsuf pref $pref \
+ flower ip_proto icmp$vsuf $filter \
+ action pass
+}
+
+icmp_capture_install()
+{
+ __icmp_capture_add_del add 100 "" "$@"
+}
+
+icmp_capture_uninstall()
+{
+ __icmp_capture_add_del del 100 "" "$@"
+}
+
+icmp6_capture_install()
+{
+ __icmp_capture_add_del add 100 v6 "$@"
+}
+
+icmp6_capture_uninstall()
+{
+ __icmp_capture_add_del del 100 v6 "$@"
+}
+
+__vlan_capture_add_del()
+{
+ local add_del=$1; shift
+ local pref=$1; shift
+ local dev=$1; shift
+ local filter=$1; shift
+
+ tc filter $add_del dev "$dev" ingress \
+ proto 802.1q pref $pref \
+ flower $filter \
+ action pass
+}
+
+vlan_capture_install()
+{
+ __vlan_capture_add_del add 100 "$@"
+}
+
+vlan_capture_uninstall()
+{
+ __vlan_capture_add_del del 100 "$@"
+}
+
+__dscp_capture_add_del()
+{
+ local add_del=$1; shift
+ local dev=$1; shift
+ local base=$1; shift
+ local dscp;
+
+ for prio in {0..7}; do
+ dscp=$((base + prio))
+ __icmp_capture_add_del $add_del $((dscp + 100)) "" $dev \
+ "skip_hw ip_tos $((dscp << 2))"
+ done
+}
+
+dscp_capture_install()
+{
+ local dev=$1; shift
+ local base=$1; shift
+
+ __dscp_capture_add_del add $dev $base
+}
+
+dscp_capture_uninstall()
+{
+ local dev=$1; shift
+ local base=$1; shift
+
+ __dscp_capture_add_del del $dev $base
+}
+
+dscp_fetch_stats()
+{
+ local dev=$1; shift
+ local base=$1; shift
+
+ for prio in {0..7}; do
+ local dscp=$((base + prio))
+ local t=$(tc_rule_stats_get $dev $((dscp + 100)))
+ echo "[$dscp]=$t "
+ done
+}
+
+matchall_sink_create()
+{
+ local dev=$1; shift
+
+ tc qdisc add dev $dev clsact
+ tc filter add dev $dev ingress \
+ pref 10000 \
+ matchall \
+ action drop
+}
+
+tests_run()
+{
+ local current_test
+
+ for current_test in ${TESTS:-$ALL_TESTS}; do
+ $current_test
+ done
+}
+
+multipath_eval()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local packets_rp12=$4
+ local packets_rp13=$5
+ local weights_ratio packets_ratio diff
+
+ RET=0
+
+ if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+ weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
+ | bc -l)
+ else
+ weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" \
+ | bc -l)
+ fi
+
+ if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
+ check_err 1 "Packet difference is 0"
+ log_test "Multipath"
+ log_info "Expected ratio $weights_ratio"
+ return
+ fi
+
+ if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+ packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
+ | bc -l)
+ else
+ packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" \
+ | bc -l)
+ fi
+
+ diff=$(echo $weights_ratio - $packets_ratio | bc -l)
+ diff=${diff#-}
+
+ test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
+ check_err $? "Too large discrepancy between expected and measured ratios"
+ log_test "$desc"
+ log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
+}
+
+##############################################################################
+# Tests
+
+ping_do()
+{
+ local if_name=$1
+ local dip=$2
+ local vrf_name
+
+ vrf_name=$(master_name_get $if_name)
+ ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null
+}
+
+ping_test()
+{
+ RET=0
+
+ ping_do $1 $2
+ check_err $?
+ log_test "ping"
+}
+
+ping6_do()
+{
+ local if_name=$1
+ local dip=$2
+ local vrf_name
+
+ vrf_name=$(master_name_get $if_name)
+ ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null
+}
+
+ping6_test()
+{
+ RET=0
+
+ ping6_do $1 $2
+ check_err $?
+ log_test "ping6"
+}
+
+learning_test()
+{
+ local bridge=$1
+ local br_port1=$2 # Connected to `host1_if`.
+ local host1_if=$3
+ local host2_if=$4
+ local mac=de:ad:be:ef:13:37
+ local ageing_time
+
+ RET=0
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ # Disable unknown unicast flooding on `br_port1` to make sure
+ # packets are only forwarded through the port after a matching
+ # FDB entry was installed.
+ bridge link set dev $br_port1 flood off
+
+ tc qdisc add dev $host1_if ingress
+ tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
+ flower dst_mac $mac action drop
+
+ $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host1_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_fail $? "Packet reached second host when should not"
+
+ $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+ sleep 1
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_err $? "Did not find FDB record when should"
+
+ $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host1_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_err $? "Packet did not reach second host when should"
+
+ # Wait for 10 seconds after the ageing time to make sure FDB
+ # record was aged-out.
+ ageing_time=$(bridge_ageing_time_get $bridge)
+ sleep $((ageing_time + 10))
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ bridge link set dev $br_port1 learning off
+
+ $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+ sleep 1
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ bridge link set dev $br_port1 learning on
+
+ tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
+ tc qdisc del dev $host1_if ingress
+
+ bridge link set dev $br_port1 flood on
+
+ log_test "FDB learning"
+}
+
+flood_test_do()
+{
+ local should_flood=$1
+ local mac=$2
+ local ip=$3
+ local host1_if=$4
+ local host2_if=$5
+ local err=0
+
+ # Add an ACL on `host2_if` which will tell us whether the packet
+ # was flooded to it or not.
+ tc qdisc add dev $host2_if ingress
+ tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+ flower dst_mac $mac action drop
+
+ $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host2_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ if [[ $? -ne 0 && $should_flood == "true" || \
+ $? -eq 0 && $should_flood == "false" ]]; then
+ err=1
+ fi
+
+ tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+ tc qdisc del dev $host2_if ingress
+
+ return $err
+}
+
+flood_unicast_test()
+{
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+ local mac=de:ad:be:ef:13:37
+ local ip=192.0.2.100
+
+ RET=0
+
+ bridge link set dev $br_port flood off
+
+ flood_test_do false $mac $ip $host1_if $host2_if
+ check_err $? "Packet flooded when should not"
+
+ bridge link set dev $br_port flood on
+
+ flood_test_do true $mac $ip $host1_if $host2_if
+ check_err $? "Packet was not flooded when should"
+
+ log_test "Unknown unicast flood"
+}
+
+flood_multicast_test()
+{
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+ local mac=01:00:5e:00:00:01
+ local ip=239.0.0.1
+
+ RET=0
+
+ bridge link set dev $br_port mcast_flood off
+
+ flood_test_do false $mac $ip $host1_if $host2_if
+ check_err $? "Packet flooded when should not"
+
+ bridge link set dev $br_port mcast_flood on
+
+ flood_test_do true $mac $ip $host1_if $host2_if
+ check_err $? "Packet was not flooded when should"
+
+ log_test "Unregistered multicast flood"
+}
+
+flood_test()
+{
+ # `br_port` is connected to `host2_if`
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+
+ flood_unicast_test $br_port $host1_if $host2_if
+ flood_multicast_test $br_port $host1_if $host2_if
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh
new file mode 100755
index 000000000..026644360
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh
@@ -0,0 +1,160 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the device to mirror to is a
+# gretap or ip6gretap netdevice. Expect that the packets come out encapsulated,
+# and another gretap / ip6gretap netdevice is then capable of decapsulating the
+# traffic. Test that the payload is what is expected (ICMP ping request or
+# reply, depending on test).
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+ test_gretap_mac
+ test_ip6gretap_mac
+ test_two_spans
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_mac()
+{
+ local tundev=$1; shift
+ local direction=$1; shift
+ local what=$1; shift
+
+ case "$direction" in
+ ingress) local src_mac=$(mac_get $h1); local dst_mac=$(mac_get $h2)
+ ;;
+ egress) local src_mac=$(mac_get $h2); local dst_mac=$(mac_get $h1)
+ ;;
+ esac
+
+ RET=0
+
+ mirror_install $swp1 $direction $tundev "matchall $tcflags"
+ icmp_capture_install h3-${tundev} "src_mac $src_mac dst_mac $dst_mac"
+
+ mirror_test v$h1 192.0.2.1 192.0.2.2 h3-${tundev} 100 10
+
+ icmp_capture_uninstall h3-${tundev}
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction $what: envelope MAC ($tcflags)"
+}
+
+test_two_spans()
+{
+ RET=0
+
+ mirror_install $swp1 ingress gt4 "matchall $tcflags"
+ mirror_install $swp1 egress gt6 "matchall $tcflags"
+ quick_test_span_gre_dir gt4 ingress
+ quick_test_span_gre_dir gt6 egress
+
+ mirror_uninstall $swp1 ingress
+ fail_test_span_gre_dir gt4 ingress
+ quick_test_span_gre_dir gt6 egress
+
+ mirror_install $swp1 ingress gt4 "matchall $tcflags"
+ mirror_uninstall $swp1 egress
+ quick_test_span_gre_dir gt4 ingress
+ fail_test_span_gre_dir gt6 egress
+
+ mirror_uninstall $swp1 ingress
+ log_test "two simultaneously configured mirrors ($tcflags)"
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+ full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+test_gretap_mac()
+{
+ test_span_gre_mac gt4 ingress "mirror to gretap"
+ test_span_gre_mac gt4 egress "mirror to gretap"
+}
+
+test_ip6gretap_mac()
+{
+ test_span_gre_mac gt6 ingress "mirror to ip6gretap"
+ test_span_gre_mac gt6 egress "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
new file mode 100755
index 000000000..360ca133b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
@@ -0,0 +1,226 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +---------------------+ +---------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +-----|---------------+ +---------------|-----+
+# | |
+# +-----|-------------------------------------------------------------|-----+
+# | SW o--> mirror | |
+# | +---|-------------------------------------------------------------|---+ |
+# | | + $swp1 BR $swp2 + | |
+# | +---------------------------------------------------------------------+ |
+# | |
+# | +---------------------------------------------------------------------+ |
+# | | OL + gt6 (ip6gretap) + gt4 (gretap) | |
+# | | : loc=2001:db8:2::1 : loc=192.0.2.129 | |
+# | | : rem=2001:db8:2::2 : rem=192.0.2.130 | |
+# | | : ttl=100 : ttl=100 | |
+# | | : tos=inherit : tos=inherit | |
+# | +-------------------------:--|-------------------:--|-----------------+ |
+# | : | : | |
+# | +-------------------------:--|-------------------:--|-----------------+ |
+# | | UL : |,---------------------' | |
+# | | + $swp3 : || : | |
+# | | | 192.0.2.129/28 : vv : | |
+# | | | 2001:db8:2::1/64 : + ul (dummy) : | |
+# | +---|---------------------:----------------------:--------------------+ |
+# +-----|---------------------:----------------------:----------------------+
+# | : :
+# +-----|---------------------:----------------------:----------------------+
+# | H3 + $h3 + h3-gt6 (ip6gretap) + h3-gt4 (gretap) |
+# | 192.0.2.130/28 loc=2001:db8:2::2 loc=192.0.2.130 |
+# | 2001:db8:2::2/64 rem=2001:db8:2::1 rem=192.0.2.129 |
+# | ttl=100 ttl=100 |
+# | tos=inherit tos=inherit |
+# | |
+# +-------------------------------------------------------------------------+
+#
+# This tests mirroring to gretap and ip6gretap configured in an overlay /
+# underlay manner, i.e. with a bound dummy device that marks underlay VRF where
+# the encapsulated packed should be routed.
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+h3_create()
+{
+ simple_if_init $h3 192.0.2.130/28 2001:db8:2::2/64
+
+ tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
+ ip link set h3-gt4 vrf v$h3
+ matchall_sink_create h3-gt4
+
+ tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
+ ip link set h3-gt6 vrf v$h3
+ matchall_sink_create h3-gt6
+}
+
+h3_destroy()
+{
+ tunnel_destroy h3-gt6
+ tunnel_destroy h3-gt4
+
+ simple_if_fini $h3 192.0.2.130/28 2001:db8:2::2/64
+}
+
+switch_create()
+{
+ # Bridge between H1 and H2.
+
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+
+ # Underlay.
+
+ simple_if_init $swp3 192.0.2.129/28 2001:db8:2::1/64
+
+ ip link add name ul type dummy
+ ip link set dev ul master v$swp3
+ ip link set dev ul up
+
+ # Overlay.
+
+ vrf_create vrf-ol
+ ip link set dev vrf-ol up
+
+ tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+ ttl 100 tos inherit dev ul
+ ip link set dev gt4 master vrf-ol
+ ip link set dev gt4 up
+
+ tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
+ ttl 100 tos inherit dev ul allow-localremote
+ ip link set dev gt6 master vrf-ol
+ ip link set dev gt6 up
+}
+
+switch_destroy()
+{
+ vrf_destroy vrf-ol
+
+ tunnel_destroy gt6
+ tunnel_destroy gt4
+
+ simple_if_fini $swp3 192.0.2.129/28 2001:db8:2::1/64
+
+ ip link del dev ul
+
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp1 down
+ ip link set dev $swp2 down
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap w/ UL"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap w/ UL"
+}
+
+test_ip6gretap()
+{
+ full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap w/ UL"
+ full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap w/ UL"
+}
+
+test_all()
+{
+ RET=0
+
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
new file mode 100755
index 000000000..c5095da7f
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device without vlan filtering (802.1d).
+#
+# This test uses standard topology for testing mirror-to-gretap. See
+# mirror_gre_topo_lib.sh for more details. The full topology is as follows:
+#
+# +---------------------+ +---------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +-----|---------------+ +---------------|-----+
+# | |
+# +-----|-------------------------------------------------------------|-----+
+# | SW o---> mirror | |
+# | +---|-------------------------------------------------------------|---+ |
+# | | + $swp1 + br1 (802.1q bridge) $swp2 + | |
+# | +---------------------------------------------------------------------+ |
+# | |
+# | +---------------------------------------------------------------------+ |
+# | | + br2 (802.1d bridge) | |
+# | | 192.0.2.129/28 | |
+# | | + $swp3 2001:db8:2::1/64 | |
+# | +---|-----------------------------------------------------------------+ |
+# | | ^ ^ |
+# | | + gt6 (ip6gretap) | + gt4 (gretap) | |
+# | | : loc=2001:db8:2::1 | : loc=192.0.2.129 | |
+# | | : rem=2001:db8:2::2 -+ : rem=192.0.2.130 -+ |
+# | | : ttl=100 : ttl=100 |
+# | | : tos=inherit : tos=inherit |
+# +-----|---------------------:----------------------:----------------------+
+# | : :
+# +-----|---------------------:----------------------:----------------------+
+# | H3 + $h3 + h3-gt6(ip6gretap) + h3-gt4 (gretap) |
+# | 192.0.2.130/28 loc=2001:db8:2::2 loc=192.0.2.130 |
+# | 2001:db8:2::2/64 rem=2001:db8:2::1 rem=192.0.2.129 |
+# | ttl=100 ttl=100 |
+# | tos=inherit tos=inherit |
+# +-------------------------------------------------------------------------+
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip link add name br2 type bridge vlan_filtering 0
+ ip link set dev br2 up
+
+ ip link set dev $swp3 master br2
+ ip route add 192.0.2.130/32 dev br2
+ ip -6 route add 2001:db8:2::2/128 dev br2
+
+ ip address add dev br2 192.0.2.129/28
+ ip address add dev br2 2001:db8:2::1/64
+
+ ip address add dev $h3 192.0.2.130/28
+ ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $h3 192.0.2.130/28
+ ip link del dev br2
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+ full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
new file mode 100755
index 000000000..f8cda822c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device without vlan filtering (802.1d). The device attached to that
+# bridge is a VLAN.
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+ test_gretap_stp
+ test_ip6gretap_stp
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip link add name br2 type bridge vlan_filtering 0
+ ip link set dev br2 up
+
+ vlan_create $swp3 555
+
+ ip link set dev $swp3.555 master br2
+ ip route add 192.0.2.130/32 dev br2
+ ip -6 route add 2001:db8:2::2/128 dev br2
+
+ ip address add dev br2 192.0.2.129/32
+ ip address add dev br2 2001:db8:2::1/128
+
+ vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vlan_destroy $h3 555
+ ip link del dev br2
+ vlan_destroy $swp3 555
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_vlan_match()
+{
+ local tundev=$1; shift
+ local vlan_match=$1; shift
+ local what=$1; shift
+
+ full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
+ full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
+}
+
+test_gretap()
+{
+ test_vlan_match gt4 'skip_hw vlan_id 555 vlan_ethtype ip' \
+ "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \
+ "mirror to ip6gretap"
+}
+
+test_gretap_stp()
+{
+ # Sometimes after mirror installation, the neighbor's state is not valid.
+ # The reason is that there is no SW datapath activity related to the
+ # neighbor for the remote GRE address. Therefore whether the corresponding
+ # neighbor will be valid is a matter of luck, and the test is thus racy.
+ # Set the neighbor's state to permanent, so it would be always valid.
+ ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
+ nud permanent dev br2
+ full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap"
+}
+
+test_ip6gretap_stp()
+{
+ ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
+ nud permanent dev br2
+ full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
new file mode 100755
index 000000000..9ff22f280
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device with vlan filtering (802.1q).
+#
+# This test uses standard topology for testing mirror-to-gretap. See
+# mirror_gre_topo_lib.sh for more details. The full topology is as follows:
+#
+# +---------------------+ +---------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +-----|---------------+ +---------------|-----+
+# | |
+# +-----|---------------------------------------------------------------|-----+
+# | SW o---> mirror | |
+# | +---|---------------------------------------------------------------|---+ |
+# | | + $swp1 + br1 (802.1q bridge) $swp2 + | |
+# | | 192.0.2.129/28 | |
+# | | + $swp3 2001:db8:2::1/64 | |
+# | | | vid555 vid555[pvid,untagged] | |
+# | +---|-------------------------------------------------------------------+ |
+# | | ^ ^ |
+# | | + gt6 (ip6gretap) | + gt4 (gretap) | |
+# | | : loc=2001:db8:2::1 | : loc=192.0.2.129 | |
+# | | : rem=2001:db8:2::2 -+ : rem=192.0.2.130 -+ |
+# | | : ttl=100 : ttl=100 |
+# | | : tos=inherit : tos=inherit |
+# +-----|---------------------:------------------------:----------------------+
+# | : :
+# +-----|---------------------:------------------------:----------------------+
+# | H3 + $h3 + h3-gt6(ip6gretap) + h3-gt4 (gretap) |
+# | | loc=2001:db8:2::2 loc=192.0.2.130 |
+# | + $h3.555 rem=2001:db8:2::1 rem=192.0.2.129 |
+# | 192.0.2.130/28 ttl=100 ttl=100 |
+# | 2001:db8:2::2/64 tos=inherit tos=inherit |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+ # Avoid changing br1's PVID while it is operational as a L3 interface.
+ ip link set dev br1 down
+
+ ip link set dev $swp3 master br1
+ bridge vlan add dev br1 vid 555 pvid untagged self
+ ip link set dev br1 up
+ ip address add dev br1 192.0.2.129/28
+ ip address add dev br1 2001:db8:2::1/64
+
+ ip -4 route add 192.0.2.130/32 dev br1
+ ip -6 route add 2001:db8:2::2/128 dev br1
+
+ vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+ bridge vlan add dev $swp3 vid 555
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link set dev $swp3 nomaster
+ vlan_destroy $h3 555
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+ full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+tests()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+tests
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ tests
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
new file mode 100755
index 000000000..61844caf6
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device with vlan filtering (802.1q), and the egress device is a team
+# device.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1.333 | | $h1.555 + |
+# | | 192.0.2.1/28 | | 192.0.2.18/28 | |
+# +-----|----------------+ +----------------|-----+
+# | $h1 |
+# +--------------------------------+------------------------------+
+# |
+# +--------------------------------------|------------------------------------+
+# | SW o---> mirror |
+# | | |
+# | +--------------------------------+------------------------------+ |
+# | | $swp1 | |
+# | + $swp1.333 $swp1.555 + |
+# | 192.0.2.2/28 192.0.2.17/28 |
+# | |
+# | +-----------------------------------------------------------------------+ |
+# | | BR1 (802.1q) | |
+# | | + lag (team) 192.0.2.129/28 | |
+# | | / \ 2001:db8:2::1/64 | |
+# | +---/---\---------------------------------------------------------------+ |
+# | / \ ^ |
+# | | \ + gt4 (gretap) | |
+# | | \ loc=192.0.2.129 | |
+# | | \ rem=192.0.2.130 -+ |
+# | | \ ttl=100 |
+# | | \ tos=inherit |
+# | | \ |
+# | | \_________________________________ |
+# | | \ |
+# | + $swp3 + $swp4 |
+# +---|------------------------------------------------|----------------------+
+# | |
+# +---|----------------------+ +---|----------------------+
+# | + $h3 H3 | | + $h4 H4 |
+# | 192.0.2.130/28 | | 192.0.2.130/28 |
+# | 2001:db8:2::2/64 | | 2001:db8:2::2/64 |
+# +--------------------------+ +--------------------------+
+
+ALL_TESTS="
+ test_mirror_gretap_first
+ test_mirror_gretap_second
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+require_command $ARPING
+
+vlan_host_create()
+{
+ local if_name=$1; shift
+ local vid=$1; shift
+ local vrf_name=$1; shift
+ local ips=("${@}")
+
+ vrf_create $vrf_name
+ ip link set dev $vrf_name up
+ vlan_create $if_name $vid $vrf_name "${ips[@]}"
+}
+
+vlan_host_destroy()
+{
+ local if_name=$1; shift
+ local vid=$1; shift
+ local vrf_name=$1; shift
+
+ vlan_destroy $if_name $vid
+ ip link set dev $vrf_name down
+ vrf_destroy $vrf_name
+}
+
+h1_create()
+{
+ vlan_host_create $h1 333 vrf-h1 192.0.2.1/28
+ ip -4 route add 192.0.2.16/28 vrf vrf-h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip -4 route del 192.0.2.16/28 vrf vrf-h1
+ vlan_host_destroy $h1 333 vrf-h1
+}
+
+h2_create()
+{
+ vlan_host_create $h1 555 vrf-h2 192.0.2.18/28
+ ip -4 route add 192.0.2.0/28 vrf vrf-h2 nexthop via 192.0.2.17
+}
+
+h2_destroy()
+{
+ ip -4 route del 192.0.2.0/28 vrf vrf-h2
+ vlan_host_destroy $h1 555 vrf-h2
+}
+
+h3_create()
+{
+ simple_if_init $h3 192.0.2.130/28
+ tc qdisc add dev $h3 clsact
+}
+
+h3_destroy()
+{
+ tc qdisc del dev $h3 clsact
+ simple_if_fini $h3 192.0.2.130/28
+}
+
+h4_create()
+{
+ simple_if_init $h4 192.0.2.130/28
+ tc qdisc add dev $h4 clsact
+}
+
+h4_destroy()
+{
+ tc qdisc del dev $h4 clsact
+ simple_if_fini $h4 192.0.2.130/28
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+ tc qdisc add dev $swp1 clsact
+ vlan_create $swp1 333 "" 192.0.2.2/28
+ vlan_create $swp1 555 "" 192.0.2.17/28
+
+ tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+ ttl 100 tos inherit
+
+ ip link set dev $swp3 up
+ ip link set dev $swp4 up
+
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+ __addr_add_del br1 add 192.0.2.129/32
+ ip -4 route add 192.0.2.130/32 dev br1
+
+ team_create lag loadbalance $swp3 $swp4
+ ip link set dev lag master br1
+}
+
+switch_destroy()
+{
+ ip link set dev lag nomaster
+ team_destroy lag
+
+ ip -4 route del 192.0.2.130/32 dev br1
+ __addr_add_del br1 del 192.0.2.129/32
+ ip link set dev br1 down
+ ip link del dev br1
+
+ ip link set dev $swp4 down
+ ip link set dev $swp3 down
+
+ tunnel_destroy gt4
+
+ vlan_destroy $swp1 555
+ vlan_destroy $swp1 333
+ tc qdisc del dev $swp1 clsact
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp3=${NETIFS[p3]}
+ h3=${NETIFS[p4]}
+
+ swp4=${NETIFS[p5]}
+ h4=${NETIFS[p6]}
+
+ vrf_prepare
+
+ ip link set dev $h1 up
+ h1_create
+ h2_create
+ h3_create
+ h4_create
+ switch_create
+
+ trap_install $h3 ingress
+ trap_install $h4 ingress
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ trap_uninstall $h4 ingress
+ trap_uninstall $h3 ingress
+
+ switch_destroy
+ h4_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+ ip link set dev $h1 down
+
+ vrf_cleanup
+}
+
+test_lag_slave()
+{
+ local host_dev=$1; shift
+ local up_dev=$1; shift
+ local down_dev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress gt4 \
+ "proto 802.1q flower vlan_id 333 $tcflags"
+
+ # Test connectivity through $up_dev when $down_dev is set down.
+ ip link set dev $down_dev down
+ setup_wait_dev $up_dev
+ setup_wait_dev $host_dev
+ $ARPING -I br1 192.0.2.130 -qfc 1
+ sleep 2
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 10
+
+ # Test lack of connectivity when both slaves are down.
+ ip link set dev $up_dev down
+ sleep 2
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h3 1 0
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h4 1 0
+
+ ip link set dev $up_dev up
+ ip link set dev $down_dev up
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what ($tcflags)"
+}
+
+test_mirror_gretap_first()
+{
+ test_lag_slave $h3 $swp3 $swp4 "mirror to gretap: LAG first slave"
+}
+
+test_mirror_gretap_second()
+{
+ test_lag_slave $h4 $swp4 $swp3 "mirror to gretap: LAG second slave"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
new file mode 100755
index 000000000..135902aa8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
@@ -0,0 +1,271 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test how mirrors to gretap and ip6gretap react to changes to relevant
+# configuration.
+
+ALL_TESTS="
+ test_ttl
+ test_tun_up
+ test_egress_up
+ test_remote_ip
+ test_tun_del
+ test_route_del
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ # This test downs $swp3, which deletes the configured IPv6 address
+ # unless this sysctl is set.
+ sysctl_set net.ipv6.conf.$swp3.keep_addr_on_down 1
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ sysctl_restore net.ipv6.conf.$swp3.keep_addr_on_down
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_ttl()
+{
+ local tundev=$1; shift
+ local type=$1; shift
+ local prot=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ tc filter add dev $h3 ingress pref 77 prot $prot \
+ flower ip_ttl 50 action pass
+
+ mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 0
+
+ ip link set dev $tundev type $type ttl 50
+ mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
+
+ ip link set dev $tundev type $type ttl 100
+ tc filter del dev $h3 ingress pref 77
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: TTL change ($tcflags)"
+}
+
+test_span_gre_tun_up()
+{
+ local tundev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip link set dev $tundev down
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+
+ ip link set dev $tundev up
+
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: tunnel down/up ($tcflags)"
+}
+
+test_span_gre_egress_up()
+{
+ local tundev=$1; shift
+ local remote_ip=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip link set dev $swp3 down
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+
+ # After setting the device up, wait for neighbor to get resolved so that
+ # we can expect mirroring to work.
+ ip link set dev $swp3 up
+ setup_wait_dev $swp3
+ ping -c 1 -I $swp3 $remote_ip &>/dev/null
+
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: egress down/up ($tcflags)"
+}
+
+test_span_gre_remote_ip()
+{
+ local tundev=$1; shift
+ local type=$1; shift
+ local correct_ip=$1; shift
+ local wrong_ip=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip link set dev $tundev type $type remote $wrong_ip
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+
+ ip link set dev $tundev type $type remote $correct_ip
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: remote address change ($tcflags)"
+}
+
+test_span_gre_tun_del()
+{
+ local tundev=$1; shift
+ local type=$1; shift
+ local flags=$1; shift
+ local local_ip=$1; shift
+ local remote_ip=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+ ip link del dev $tundev
+ fail_test_span_gre_dir $tundev ingress
+
+ tunnel_create $tundev $type $local_ip $remote_ip \
+ ttl 100 tos inherit $flags
+
+ # Recreating the tunnel doesn't reestablish mirroring, so reinstall it
+ # and verify it works for the follow-up tests.
+ mirror_uninstall $swp1 ingress
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: tunnel deleted ($tcflags)"
+}
+
+test_span_gre_route_del()
+{
+ local tundev=$1; shift
+ local edev=$1; shift
+ local route=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+
+ ip route del $route dev $edev
+ fail_test_span_gre_dir $tundev ingress
+
+ ip route add $route dev $edev
+ quick_test_span_gre_dir $tundev ingress
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: underlay route removal ($tcflags)"
+}
+
+test_ttl()
+{
+ test_span_gre_ttl gt4 gretap ip "mirror to gretap"
+ test_span_gre_ttl gt6 ip6gretap ipv6 "mirror to ip6gretap"
+}
+
+test_tun_up()
+{
+ test_span_gre_tun_up gt4 "mirror to gretap"
+ test_span_gre_tun_up gt6 "mirror to ip6gretap"
+}
+
+test_egress_up()
+{
+ test_span_gre_egress_up gt4 192.0.2.130 "mirror to gretap"
+ test_span_gre_egress_up gt6 2001:db8:2::2 "mirror to ip6gretap"
+}
+
+test_remote_ip()
+{
+ test_span_gre_remote_ip gt4 gretap 192.0.2.130 192.0.2.132 "mirror to gretap"
+ test_span_gre_remote_ip gt6 ip6gretap 2001:db8:2::2 2001:db8:2::4 "mirror to ip6gretap"
+}
+
+test_tun_del()
+{
+ test_span_gre_tun_del gt4 gretap "" \
+ 192.0.2.129 192.0.2.130 "mirror to gretap"
+ test_span_gre_tun_del gt6 ip6gretap allow-localremote \
+ 2001:db8:2::1 2001:db8:2::2 "mirror to ip6gretap"
+}
+
+test_route_del()
+{
+ test_span_gre_route_del gt4 $swp3 192.0.2.128/28 "mirror to gretap"
+ test_span_gre_route_del gt6 $swp3 2001:db8:2::/64 "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
new file mode 100755
index 000000000..12914f406
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# This tests flower-triggered mirroring to gretap and ip6gretap netdevices. The
+# interfaces on H1 and H2 have two addresses each. Flower match on one of the
+# addresses is configured with mirror action. It is expected that when pinging
+# this address, mirroring takes place, whereas when pinging the other one,
+# there's no mirroring.
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+
+ ip address add dev $h1 192.0.2.3/28
+ ip address add dev $h2 192.0.2.4/28
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h2 192.0.2.4/28
+ ip address del dev $h1 192.0.2.3/28
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_dir_acl()
+{
+ test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+}
+
+fail_test_span_gre_dir_acl()
+{
+ fail_test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+}
+
+full_test_span_gre_dir_acl()
+{
+ local tundev=$1; shift
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local match_dip=$1; shift
+ local what=$1; shift
+
+ mirror_install $swp1 $direction $tundev \
+ "protocol ip flower $tcflags dst_ip $match_dip"
+ fail_test_span_gre_dir $tundev $direction
+ test_span_gre_dir_acl "$tundev" "$direction" \
+ "$forward_type" "$backward_type"
+ mirror_uninstall $swp1 $direction
+
+ # Test lack of mirroring after ACL mirror is uninstalled.
+ fail_test_span_gre_dir_acl "$tundev" "$direction"
+
+ log_test "$direction $what ($tcflags)"
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir_acl gt4 ingress 8 0 192.0.2.4 "ACL mirror to gretap"
+ full_test_span_gre_dir_acl gt4 egress 0 8 192.0.2.3 "ACL mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ full_test_span_gre_dir_acl gt6 ingress 8 0 192.0.2.4 "ACL mirror to ip6gretap"
+ full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap"
+}
+
+test_all()
+{
+ RET=0
+
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
new file mode 100755
index 000000000..9edf4cb10
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
@@ -0,0 +1,285 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# team device.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1.333 | | $h1.555 + |
+# | | 192.0.2.1/28 | | 192.0.2.18/28 | |
+# +----|-----------------+ +----------------|-----+
+# | $h1 |
+# +---------------------------------+------------------------------+
+# |
+# +--------------------------------------|------------------------------------+
+# | SW o---> mirror |
+# | | |
+# | +----------------------------------+------------------------------+ |
+# | | $swp1 | |
+# | + $swp1.333 $swp1.555 + |
+# | 192.0.2.2/28 192.0.2.17/28 |
+# | |
+# | |
+# | + gt4 (gretap) ,-> + lag1 (team) |
+# | loc=192.0.2.129 | | 192.0.2.129/28 |
+# | rem=192.0.2.130 --' | |
+# | ttl=100 | |
+# | tos=inherit | |
+# | _____________________|______________________ |
+# | / \ |
+# | / \ |
+# | + $swp3 + $swp4 |
+# +---|------------------------------------------------|----------------------+
+# | |
+# +---|------------------------------------------------|----------------------+
+# | + $h3 + $h4 H3 |
+# | \ / |
+# | \____________________________________________/ |
+# | | |
+# | + lag2 (team) |
+# | 192.0.2.130/28 |
+# | |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ test_mirror_gretap_first
+ test_mirror_gretap_second
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+require_command $ARPING
+
+vlan_host_create()
+{
+ local if_name=$1; shift
+ local vid=$1; shift
+ local vrf_name=$1; shift
+ local ips=("${@}")
+
+ vrf_create $vrf_name
+ ip link set dev $vrf_name up
+ vlan_create $if_name $vid $vrf_name "${ips[@]}"
+}
+
+vlan_host_destroy()
+{
+ local if_name=$1; shift
+ local vid=$1; shift
+ local vrf_name=$1; shift
+
+ vlan_destroy $if_name $vid
+ ip link set dev $vrf_name down
+ vrf_destroy $vrf_name
+}
+
+h1_create()
+{
+ vlan_host_create $h1 333 vrf-h1 192.0.2.1/28
+ ip -4 route add 192.0.2.16/28 vrf vrf-h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip -4 route del 192.0.2.16/28 vrf vrf-h1
+ vlan_host_destroy $h1 333 vrf-h1
+}
+
+h2_create()
+{
+ vlan_host_create $h1 555 vrf-h2 192.0.2.18/28
+ ip -4 route add 192.0.2.0/28 vrf vrf-h2 nexthop via 192.0.2.17
+}
+
+h2_destroy()
+{
+ ip -4 route del 192.0.2.0/28 vrf vrf-h2
+ vlan_host_destroy $h1 555 vrf-h2
+}
+
+h3_create_team()
+{
+ team_create lag2 lacp $h3 $h4
+ __simple_if_init lag2 vrf-h3 192.0.2.130/32
+ ip -4 route add vrf vrf-h3 192.0.2.129/32 dev lag2
+}
+
+h3_destroy_team()
+{
+ ip -4 route del vrf vrf-h3 192.0.2.129/32 dev lag2
+ __simple_if_fini lag2 192.0.2.130/32
+ team_destroy lag2
+
+ ip link set dev $h3 down
+ ip link set dev $h4 down
+}
+
+h3_create()
+{
+ vrf_create vrf-h3
+ ip link set dev vrf-h3 up
+ tc qdisc add dev $h3 clsact
+ tc qdisc add dev $h4 clsact
+ h3_create_team
+}
+
+h3_destroy()
+{
+ h3_destroy_team
+ tc qdisc del dev $h4 clsact
+ tc qdisc del dev $h3 clsact
+ ip link set dev vrf-h3 down
+ vrf_destroy vrf-h3
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+ tc qdisc add dev $swp1 clsact
+ vlan_create $swp1 333 "" 192.0.2.2/28
+ vlan_create $swp1 555 "" 192.0.2.17/28
+
+ tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+ ttl 100 tos inherit
+
+ ip link set dev $swp3 up
+ ip link set dev $swp4 up
+ team_create lag1 lacp $swp3 $swp4
+ __addr_add_del lag1 add 192.0.2.129/32
+ ip -4 route add 192.0.2.130/32 dev lag1
+}
+
+switch_destroy()
+{
+ ip -4 route del 192.0.2.130/32 dev lag1
+ __addr_add_del lag1 del 192.0.2.129/32
+ team_destroy lag1
+
+ ip link set dev $swp4 down
+ ip link set dev $swp3 down
+
+ tunnel_destroy gt4
+
+ vlan_destroy $swp1 555
+ vlan_destroy $swp1 333
+ tc qdisc del dev $swp1 clsact
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp3=${NETIFS[p3]}
+ h3=${NETIFS[p4]}
+
+ swp4=${NETIFS[p5]}
+ h4=${NETIFS[p6]}
+
+ vrf_prepare
+
+ ip link set dev $h1 up
+ h1_create
+ h2_create
+ h3_create
+ switch_create
+
+ trap_install $h3 ingress
+ trap_install $h4 ingress
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ trap_uninstall $h4 ingress
+ trap_uninstall $h3 ingress
+
+ switch_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+ ip link set dev $h1 down
+
+ vrf_cleanup
+}
+
+test_lag_slave()
+{
+ local up_dev=$1; shift
+ local down_dev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress gt4 \
+ "proto 802.1q flower vlan_id 333 $tcflags"
+
+ # Move $down_dev away from the team. That will prompt change in
+ # txability of the connected device, without changing its upness. The
+ # driver should notice the txability change and move the traffic to the
+ # other slave.
+ ip link set dev $down_dev nomaster
+ sleep 2
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 $up_dev 1 10
+
+ # Test lack of connectivity when neither slave is txable.
+ ip link set dev $up_dev nomaster
+ sleep 2
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h3 1 0
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h4 1 0
+ mirror_uninstall $swp1 ingress
+
+ # Recreate H3's team device, because mlxsw, which this test is
+ # predominantly mean to test, requires a bottom-up construction and
+ # doesn't allow enslavement to a device that already has an upper.
+ h3_destroy_team
+ h3_create_team
+ # Wait for ${h,swp}{3,4}.
+ setup_wait
+
+ log_test "$what ($tcflags)"
+}
+
+test_mirror_gretap_first()
+{
+ test_lag_slave $h3 $h4 "mirror to gretap: LAG first slave"
+}
+
+test_mirror_gretap_second()
+{
+ test_lag_slave $h4 $h3 "mirror to gretap: LAG second slave"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
new file mode 100644
index 000000000..fac486178
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: GPL-2.0
+
+source "$relative_path/mirror_lib.sh"
+
+quick_test_span_gre_dir_ips()
+{
+ local tundev=$1; shift
+
+ do_test_span_dir_ips 10 h3-$tundev "$@"
+}
+
+fail_test_span_gre_dir_ips()
+{
+ local tundev=$1; shift
+
+ do_test_span_dir_ips 0 h3-$tundev "$@"
+}
+
+test_span_gre_dir_ips()
+{
+ local tundev=$1; shift
+
+ test_span_dir_ips h3-$tundev "$@"
+}
+
+full_test_span_gre_dir_ips()
+{
+ local tundev=$1; shift
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local what=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 $direction $tundev "matchall $tcflags"
+ test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+ "$backward_type" "$ip1" "$ip2"
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction $what ($tcflags)"
+}
+
+full_test_span_gre_dir_vlan_ips()
+{
+ local tundev=$1; shift
+ local direction=$1; shift
+ local vlan_match=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local what=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 $direction $tundev "matchall $tcflags"
+
+ test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+ "$backward_type" "$ip1" "$ip2"
+
+ tc filter add dev $h3 ingress pref 77 prot 802.1q \
+ flower $vlan_match \
+ action pass
+ mirror_test v$h1 $ip1 $ip2 $h3 77 10
+ tc filter del dev $h3 ingress pref 77
+
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction $what ($tcflags)"
+}
+
+quick_test_span_gre_dir()
+{
+ quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+fail_test_span_gre_dir()
+{
+ fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+test_span_gre_dir()
+{
+ test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_dir()
+{
+ full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_dir_vlan()
+{
+ full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_stp_ips()
+{
+ local tundev=$1; shift
+ local nbpdev=$1; shift
+ local what=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+ local h3mac=$(mac_get $h3)
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+ bridge link set dev $nbpdev state disabled
+ sleep 1
+ fail_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+ bridge link set dev $nbpdev state forwarding
+ sleep 1
+ quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: STP state ($tcflags)"
+}
+
+full_test_span_gre_stp()
+{
+ full_test_span_gre_stp_ips "$@" 192.0.2.1 192.0.2.2
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
new file mode 100755
index 000000000..fc0508e40
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for mirroring to gretap and ip6gretap, such that the neighbor entry for
+# the tunnel remote address has invalid address at the time that the mirroring
+# is set up. Later on, the neighbor is deleted and it is expected to be
+# reinitialized using the usual ARP process, and the mirroring offload updated.
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_neigh()
+{
+ local addr=$1; shift
+ local tundev=$1; shift
+ local direction=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55
+ mirror_install $swp1 $direction $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+ ip neigh del dev $swp3 $addr
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction $what: neighbor change ($tcflags)"
+}
+
+test_gretap()
+{
+ test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap"
+ test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap"
+ test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
new file mode 100755
index 000000000..6f9ef1820
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test that gretap and ip6gretap mirroring works when the other tunnel endpoint
+# is reachable through a next-hop route (as opposed to directly-attached route).
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.$h3.rp_filter 0
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ sysctl_set net.ipv4.conf.v$h3.rp_filter 0
+
+ ip address add dev $swp3 192.0.2.161/28
+ ip address add dev $h3 192.0.2.162/28
+ ip address add dev gt4 192.0.2.129/32
+ ip address add dev h3-gt4 192.0.2.130/32
+
+ # IPv6 route can't be added after address. Such routes are rejected due
+ # to the gateway address having been configured on the local system. It
+ # works the other way around though.
+ ip address add dev $swp3 2001:db8:4::1/64
+ ip -6 route add 2001:db8:2::2/128 via 2001:db8:4::2
+ ip address add dev $h3 2001:db8:4::2/64
+ ip address add dev gt6 2001:db8:2::1
+ ip address add dev h3-gt6 2001:db8:2::2
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip -6 route del 2001:db8:2::2/128 via 2001:db8:4::2
+ ip address del dev $h3 2001:db8:4::2/64
+ ip address del dev $swp3 2001:db8:4::1/64
+
+ ip address del dev $h3 192.0.2.162/28
+ ip address del dev $swp3 192.0.2.161/28
+
+ sysctl_restore net.ipv4.conf.v$h3.rp_filter 0
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+
+ sysctl_restore net.ipv4.conf.$h3.rp_filter
+ sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
+test_gretap()
+{
+ RET=0
+ mirror_install $swp1 ingress gt4 "matchall $tcflags"
+
+ # For IPv4, test that there's no mirroring without the route directing
+ # the traffic to tunnel remote address. Then add it and test that
+ # mirroring starts. For IPv6 we can't test this due to the limitation
+ # that routes for locally-specified IPv6 addresses can't be added.
+ fail_test_span_gre_dir gt4 ingress
+
+ ip route add 192.0.2.130/32 via 192.0.2.162
+ quick_test_span_gre_dir gt4 ingress
+ ip route del 192.0.2.130/32 via 192.0.2.162
+
+ mirror_uninstall $swp1 ingress
+ log_test "mirror to gre with next-hop remote ($tcflags)"
+}
+
+test_ip6gretap()
+{
+ RET=0
+
+ mirror_install $swp1 ingress gt6 "matchall $tcflags"
+ quick_test_span_gre_dir gt6 ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "mirror to ip6gre with next-hop remote ($tcflags)"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
new file mode 100644
index 000000000..39c03e286
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is the standard topology for testing mirroring to gretap and ip6gretap
+# netdevices. The tests that use it tweak it in one way or another--importantly,
+# $swp3 and $h3 need to have addresses set up.
+#
+# +---------------------+ +---------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +-----|---------------+ +---------------|-----+
+# | |
+# +-----|-------------------------------------------------------------|-----+
+# | SW o--> mirror | |
+# | +---|-------------------------------------------------------------|---+ |
+# | | + $swp1 BR $swp2 + | |
+# | +---------------------------------------------------------------------+ |
+# | |
+# | + $swp3 + gt6 (ip6gretap) + gt4 (gretap) |
+# | | : loc=2001:db8:2::1 : loc=192.0.2.129 |
+# | | : rem=2001:db8:2::2 : rem=192.0.2.130 |
+# | | : ttl=100 : ttl=100 |
+# | | : tos=inherit : tos=inherit |
+# | | : : |
+# +-----|---------------------:----------------------:----------------------+
+# | : :
+# +-----|---------------------:----------------------:----------------------+
+# | H3 + $h3 + h3-gt6 (ip6gretap) + h3-gt4 (gretap) |
+# | loc=2001:db8:2::2 loc=192.0.2.130 |
+# | rem=2001:db8:2::1 rem=192.0.2.129 |
+# | ttl=100 ttl=100 |
+# | tos=inherit tos=inherit |
+# | |
+# +-------------------------------------------------------------------------+
+
+source "$relative_path/mirror_topo_lib.sh"
+
+mirror_gre_topo_h3_create()
+{
+ mirror_topo_h3_create
+
+ tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
+ ip link set h3-gt4 vrf v$h3
+ matchall_sink_create h3-gt4
+
+ tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
+ ip link set h3-gt6 vrf v$h3
+ matchall_sink_create h3-gt6
+}
+
+mirror_gre_topo_h3_destroy()
+{
+ tunnel_destroy h3-gt6
+ tunnel_destroy h3-gt4
+
+ mirror_topo_h3_destroy
+}
+
+mirror_gre_topo_switch_create()
+{
+ mirror_topo_switch_create
+
+ tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+ ttl 100 tos inherit
+
+ tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
+ ttl 100 tos inherit allow-localremote
+}
+
+mirror_gre_topo_switch_destroy()
+{
+ tunnel_destroy gt6
+ tunnel_destroy gt4
+
+ mirror_topo_switch_destroy
+}
+
+mirror_gre_topo_create()
+{
+ mirror_topo_h1_create
+ mirror_topo_h2_create
+ mirror_gre_topo_h3_create
+
+ mirror_gre_topo_switch_create
+}
+
+mirror_gre_topo_destroy()
+{
+ mirror_gre_topo_switch_destroy
+
+ mirror_gre_topo_h3_destroy
+ mirror_topo_h2_destroy
+ mirror_topo_h1_destroy
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
new file mode 100755
index 000000000..88cecdb9a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" that mirrors to a gretap netdevice
+# whose underlay route points at a vlan device.
+
+ALL_TESTS="
+ test_gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip link add name $swp3.555 link $swp3 type vlan id 555
+ ip address add dev $swp3.555 192.0.2.129/32
+ ip address add dev $swp3.555 2001:db8:2::1/128
+ ip link set dev $swp3.555 up
+
+ ip route add 192.0.2.130/32 dev $swp3.555
+ ip -6 route add 2001:db8:2::2/128 dev $swp3.555
+
+ ip link add name $h3.555 link $h3 type vlan id 555
+ ip link set dev $h3.555 master v$h3
+ ip address add dev $h3.555 192.0.2.130/28
+ ip address add dev $h3.555 2001:db8:2::2/64
+ ip link set dev $h3.555 up
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link del dev $h3.555
+ ip link del dev $swp3.555
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
new file mode 100755
index 000000000..204b25f13
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# vlan device on top of a bridge device with vlan filtering (802.1q).
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+ test_gretap_forbidden_cpu
+ test_ip6gretap_forbidden_cpu
+ test_gretap_forbidden_egress
+ test_ip6gretap_forbidden_egress
+ test_gretap_untagged_egress
+ test_ip6gretap_untagged_egress
+ test_gretap_fdb_roaming
+ test_ip6gretap_fdb_roaming
+ test_gretap_stp
+ test_ip6gretap_stp
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+require_command $ARPING
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ # gt4's remote address is at $h3.555, not $h3. Thus the packets arriving
+ # directly to $h3 for test_gretap_untagged_egress() are rejected by
+ # rp_filter and the test spuriously fails.
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.$h3.rp_filter 0
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ vlan_create br1 555 "" 192.0.2.129/32 2001:db8:2::1/128
+ bridge vlan add dev br1 vid 555 self
+ ip route rep 192.0.2.130/32 dev br1.555
+ ip -6 route rep 2001:db8:2::2/128 dev br1.555
+
+ vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+
+ ip link set dev $swp3 master br1
+ bridge vlan add dev $swp3 vid 555
+ bridge vlan add dev $swp2 vid 555
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp3 nomaster
+ vlan_destroy $h3 555
+ vlan_destroy br1 555
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+
+ sysctl_restore net.ipv4.conf.$h3.rp_filter
+ sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
+test_vlan_match()
+{
+ local tundev=$1; shift
+ local vlan_match=$1; shift
+ local what=$1; shift
+
+ full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
+ full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
+}
+
+test_gretap()
+{
+ test_vlan_match gt4 'skip_hw vlan_id 555 vlan_ethtype ip' \
+ "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \
+ "mirror to ip6gretap"
+}
+
+test_span_gre_forbidden_cpu()
+{
+ local tundev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ # Run the pass-test first, to prime neighbor table.
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+
+ # Now forbid the VLAN at the bridge and see it fail.
+ bridge vlan del dev br1 vid 555 self
+ sleep 1
+ fail_test_span_gre_dir $tundev ingress
+
+ bridge vlan add dev br1 vid 555 self
+ sleep 1
+ quick_test_span_gre_dir $tundev ingress
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: vlan forbidden at a bridge ($tcflags)"
+}
+
+test_gretap_forbidden_cpu()
+{
+ test_span_gre_forbidden_cpu gt4 "mirror to gretap"
+}
+
+test_ip6gretap_forbidden_cpu()
+{
+ test_span_gre_forbidden_cpu gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_forbidden_egress()
+{
+ local tundev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+
+ bridge vlan del dev $swp3 vid 555
+ sleep 1
+ fail_test_span_gre_dir $tundev ingress
+
+ bridge vlan add dev $swp3 vid 555
+ # Re-prime FDB
+ $ARPING -I br1.555 192.0.2.130 -fqc 1
+ sleep 1
+ quick_test_span_gre_dir $tundev ingress
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: vlan forbidden at a bridge egress ($tcflags)"
+}
+
+test_gretap_forbidden_egress()
+{
+ test_span_gre_forbidden_egress gt4 "mirror to gretap"
+}
+
+test_ip6gretap_forbidden_egress()
+{
+ test_span_gre_forbidden_egress gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_untagged_egress()
+{
+ local tundev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+
+ quick_test_span_gre_dir $tundev ingress
+ quick_test_span_vlan_dir $h3 555 ingress
+
+ bridge vlan add dev $swp3 vid 555 pvid untagged
+ sleep 1
+ quick_test_span_gre_dir $tundev ingress
+ fail_test_span_vlan_dir $h3 555 ingress
+
+ bridge vlan add dev $swp3 vid 555
+ sleep 1
+ quick_test_span_gre_dir $tundev ingress
+ quick_test_span_vlan_dir $h3 555 ingress
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: vlan untagged at a bridge egress ($tcflags)"
+}
+
+test_gretap_untagged_egress()
+{
+ test_span_gre_untagged_egress gt4 "mirror to gretap"
+}
+
+test_ip6gretap_untagged_egress()
+{
+ test_span_gre_untagged_egress gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_fdb_roaming()
+{
+ local tundev=$1; shift
+ local what=$1; shift
+ local h3mac=$(mac_get $h3)
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+
+ bridge fdb del dev $swp3 $h3mac vlan 555 master
+ bridge fdb add dev $swp2 $h3mac vlan 555 master
+ sleep 1
+ fail_test_span_gre_dir $tundev ingress
+
+ bridge fdb del dev $swp2 $h3mac vlan 555 master
+ # Re-prime FDB
+ $ARPING -I br1.555 192.0.2.130 -fqc 1
+ sleep 1
+ quick_test_span_gre_dir $tundev ingress
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: MAC roaming ($tcflags)"
+}
+
+test_gretap_fdb_roaming()
+{
+ test_span_gre_fdb_roaming gt4 "mirror to gretap"
+}
+
+test_ip6gretap_fdb_roaming()
+{
+ test_span_gre_fdb_roaming gt6 "mirror to ip6gretap"
+}
+
+test_gretap_stp()
+{
+ full_test_span_gre_stp gt4 $swp3 "mirror to gretap"
+}
+
+test_ip6gretap_stp()
+{
+ full_test_span_gre_stp gt6 $swp3 "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
new file mode 100644
index 000000000..07991e102
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -0,0 +1,132 @@
+# SPDX-License-Identifier: GPL-2.0
+
+mirror_install()
+{
+ local from_dev=$1; shift
+ local direction=$1; shift
+ local to_dev=$1; shift
+ local filter=$1; shift
+
+ tc filter add dev $from_dev $direction \
+ pref 1000 $filter \
+ action mirred egress mirror dev $to_dev
+}
+
+mirror_uninstall()
+{
+ local from_dev=$1; shift
+ local direction=$1; shift
+
+ tc filter del dev $swp1 $direction pref 1000
+}
+
+mirror_test()
+{
+ local vrf_name=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local dev=$1; shift
+ local pref=$1; shift
+ local expect=$1; shift
+
+ local t0=$(tc_rule_stats_get $dev $pref)
+ ip vrf exec $vrf_name \
+ ${PING} ${sip:+-I $sip} $dip -c 10 -i 0.1 -w 2 &> /dev/null
+ local t1=$(tc_rule_stats_get $dev $pref)
+ local delta=$((t1 - t0))
+ # Tolerate a couple stray extra packets.
+ ((expect <= delta && delta <= expect + 2))
+ check_err $? "Expected to capture $expect packets, got $delta."
+}
+
+do_test_span_dir_ips()
+{
+ local expect=$1; shift
+ local dev=$1; shift
+ local direction=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ icmp_capture_install $dev
+ mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+ mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+ icmp_capture_uninstall $dev
+}
+
+quick_test_span_dir_ips()
+{
+ do_test_span_dir_ips 10 "$@"
+}
+
+fail_test_span_dir_ips()
+{
+ do_test_span_dir_ips 0 "$@"
+}
+
+test_span_dir_ips()
+{
+ local dev=$1; shift
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2"
+
+ icmp_capture_install $dev "type $forward_type"
+ mirror_test v$h1 $ip1 $ip2 $dev 100 10
+ icmp_capture_uninstall $dev
+
+ icmp_capture_install $dev "type $backward_type"
+ mirror_test v$h2 $ip2 $ip1 $dev 100 10
+ icmp_capture_uninstall $dev
+}
+
+fail_test_span_dir()
+{
+ fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+test_span_dir()
+{
+ test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+do_test_span_vlan_dir_ips()
+{
+ local expect=$1; shift
+ local dev=$1; shift
+ local vid=$1; shift
+ local direction=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ # Install the capture as skip_hw to avoid double-counting of packets.
+ # The traffic is meant for local box anyway, so will be trapped to
+ # kernel.
+ vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype ip"
+ mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+ mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+ vlan_capture_uninstall $dev
+}
+
+quick_test_span_vlan_dir_ips()
+{
+ do_test_span_vlan_dir_ips 10 "$@"
+}
+
+fail_test_span_vlan_dir_ips()
+{
+ do_test_span_vlan_dir_ips 0 "$@"
+}
+
+quick_test_span_vlan_dir()
+{
+ quick_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+fail_test_span_vlan_dir()
+{
+ fail_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
new file mode 100644
index 000000000..04979e596
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is the standard topology for testing mirroring. The tests that use it
+# tweak it in one way or another--typically add more devices to the topology.
+#
+# +---------------------+ +---------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +-----|---------------+ +---------------|-----+
+# | |
+# +-----|-------------------------------------------------------------|-----+
+# | SW o--> mirror | |
+# | +---|-------------------------------------------------------------|---+ |
+# | | + $swp1 BR $swp2 + | |
+# | +---------------------------------------------------------------------+ |
+# | |
+# | + $swp3 |
+# +-----|-------------------------------------------------------------------+
+# |
+# +-----|-------------------------------------------------------------------+
+# | H3 + $h3 |
+# | |
+# +-------------------------------------------------------------------------+
+
+mirror_topo_h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+mirror_topo_h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+mirror_topo_h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+}
+
+mirror_topo_h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+mirror_topo_h3_create()
+{
+ simple_if_init $h3
+ tc qdisc add dev $h3 clsact
+}
+
+mirror_topo_h3_destroy()
+{
+ tc qdisc del dev $h3 clsact
+ simple_if_fini $h3
+}
+
+mirror_topo_switch_create()
+{
+ ip link set dev $swp3 up
+
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+}
+
+mirror_topo_switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp1 down
+ ip link set dev $swp2 down
+ ip link del dev br1
+
+ ip link set dev $swp3 down
+}
+
+mirror_topo_create()
+{
+ mirror_topo_h1_create
+ mirror_topo_h2_create
+ mirror_topo_h3_create
+
+ mirror_topo_switch_create
+}
+
+mirror_topo_destroy()
+{
+ mirror_topo_switch_destroy
+
+ mirror_topo_h3_destroy
+ mirror_topo_h2_destroy
+ mirror_topo_h1_destroy
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
new file mode 100755
index 000000000..9ab2ce77b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing mirroring. See mirror_topo_lib.sh
+# for more details.
+#
+# Test for "tc action mirred egress mirror" that mirrors to a vlan device.
+
+ALL_TESTS="
+ test_vlan
+ test_tagged_vlan
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_topo_create
+
+ vlan_create $swp3 555
+
+ vlan_create $h3 555 v$h3
+ matchall_sink_create $h3.555
+
+ vlan_create $h1 111 v$h1 192.0.2.17/28
+ bridge vlan add dev $swp1 vid 111
+
+ vlan_create $h2 111 v$h2 192.0.2.18/28
+ bridge vlan add dev $swp2 vid 111
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vlan_destroy $h2 111
+ vlan_destroy $h1 111
+ vlan_destroy $h3 555
+ vlan_destroy $swp3 555
+
+ mirror_topo_destroy
+ vrf_cleanup
+}
+
+test_vlan_dir()
+{
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
+ test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type"
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction mirror to vlan ($tcflags)"
+}
+
+test_vlan()
+{
+ test_vlan_dir ingress 8 0
+ test_vlan_dir egress 0 8
+}
+
+test_tagged_vlan_dir()
+{
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
+ do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" \
+ 192.0.2.17 192.0.2.18
+ do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" \
+ 192.0.2.17 192.0.2.18
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction mirror tagged to vlan ($tcflags)"
+}
+
+test_tagged_vlan()
+{
+ test_tagged_vlan_dir ingress 8 0
+ test_tagged_vlan_dir egress 0 8
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+ trap_install $h3 ingress
+
+ tests_run
+
+ trap_uninstall $h3 ingress
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
new file mode 100755
index 000000000..a75cb51cc
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -0,0 +1,135 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+
+ ip address add 192.0.2.1/24 dev $rp1
+ ip address add 2001:db8:1::1/64 dev $rp1
+
+ ip address add 198.51.100.1/24 dev $rp2
+ ip address add 2001:db8:2::1/64 dev $rp2
+}
+
+router_destroy()
+{
+ ip address del 2001:db8:2::1/64 dev $rp2
+ ip address del 198.51.100.1/24 dev $rp2
+
+ ip address del 2001:db8:1::1/64 dev $rp1
+ ip address del 192.0.2.1/24 dev $rp1
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge.sh b/tools/testing/selftests/net/forwarding/router_bridge.sh
new file mode 100755
index 000000000..ebc596a27
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+ ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del 2001:db8:2::/64 vrf v$h1
+ ip -4 route del 192.0.2.128/28 vrf v$h1
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+ ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+ ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip -6 route del 2001:db8:1::/64 vrf v$h2
+ ip -4 route del 192.0.2.0/28 vrf v$h2
+ simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
+}
+
+router_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+
+ ip link set dev $swp2 up
+ __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+}
+
+router_destroy()
+{
+ __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
+ ip link set dev $swp2 down
+
+ __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.130
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
new file mode 100755
index 000000000..fa6a88c50
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ vlan
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 555 v$h1 192.0.2.1/28 2001:db8:1::1/64
+ ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del 2001:db8:2::/64 vrf v$h1
+ ip -4 route del 192.0.2.128/28 vrf v$h1
+ vlan_destroy $h1 555
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+ ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+ ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip -6 route del 2001:db8:1::/64 vrf v$h2
+ ip -4 route del 192.0.2.0/28 vrf v$h2
+ simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
+}
+
+router_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ bridge vlan add dev br1 vid 555 self pvid untagged
+ bridge vlan add dev $swp1 vid 555
+
+ __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+
+ ip link set dev $swp2 up
+ __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+}
+
+router_destroy()
+{
+ __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
+ ip link set dev $swp2 down
+
+ __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+vlan()
+{
+ RET=0
+
+ bridge vlan add dev br1 vid 333 self
+ check_err $? "Can't add a non-PVID VLAN"
+ bridge vlan del dev br1 vid 333 self
+ check_err $? "Can't remove a non-PVID VLAN"
+
+ log_test "vlan"
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.130
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_broadcast.sh b/tools/testing/selftests/net/forwarding/router_broadcast.sh
new file mode 100755
index 000000000..7bd2ebb6e
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_broadcast.sh
@@ -0,0 +1,233 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4"
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 198.51.200.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+}
+
+h1_destroy()
+{
+ ip route del 198.51.200.0/24 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 198.51.200.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+}
+
+h2_destroy()
+{
+ ip route del 198.51.200.0/24 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+h3_create()
+{
+ vrf_create "vrf-h3"
+ ip link set dev $h3 master vrf-h3
+
+ ip link set dev vrf-h3 up
+ ip link set dev $h3 up
+
+ ip address add 198.51.200.2/24 dev $h3
+
+ ip route add 192.0.2.0/24 vrf vrf-h3 nexthop via 198.51.200.1
+ ip route add 198.51.100.0/24 vrf vrf-h3 nexthop via 198.51.200.1
+}
+
+h3_destroy()
+{
+ ip route del 198.51.100.0/24 vrf vrf-h3
+ ip route del 192.0.2.0/24 vrf vrf-h3
+
+ ip address del 198.51.200.2/24 dev $h3
+
+ ip link set dev $h3 down
+ vrf_destroy "vrf-h3"
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+ ip link set dev $rp3 up
+
+ ip address add 192.0.2.1/24 dev $rp1
+
+ ip address add 198.51.100.1/24 dev $rp2
+ ip address add 198.51.200.1/24 dev $rp3
+}
+
+router_destroy()
+{
+ ip address del 198.51.200.1/24 dev $rp3
+ ip address del 198.51.100.1/24 dev $rp2
+
+ ip address del 192.0.2.1/24 dev $rp1
+
+ ip link set dev $rp3 down
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+bc_forwarding_disable()
+{
+ sysctl_set net.ipv4.conf.all.bc_forwarding 0
+ sysctl_set net.ipv4.conf.$rp1.bc_forwarding 0
+}
+
+bc_forwarding_enable()
+{
+ sysctl_set net.ipv4.conf.all.bc_forwarding 1
+ sysctl_set net.ipv4.conf.$rp1.bc_forwarding 1
+}
+
+bc_forwarding_restore()
+{
+ sysctl_restore net.ipv4.conf.$rp1.bc_forwarding
+ sysctl_restore net.ipv4.conf.all.bc_forwarding
+}
+
+ping_test_from()
+{
+ local oif=$1
+ local dip=$2
+ local from=$3
+ local fail=${4:-0}
+
+ RET=0
+
+ log_info "ping $dip, expected reply from $from"
+ ip vrf exec $(master_name_get $oif) \
+ $PING -I $oif $dip -c 10 -i 0.1 -w 2 -b 2>&1 | grep $from &> /dev/null
+ check_err_fail $fail $?
+}
+
+ping_ipv4()
+{
+ sysctl_set net.ipv4.icmp_echo_ignore_broadcasts 0
+
+ bc_forwarding_disable
+ log_info "bc_forwarding disabled on r1 =>"
+ ping_test_from $h1 198.51.100.255 192.0.2.1
+ log_test "h1 -> net2: reply from r1 (not forwarding)"
+ ping_test_from $h1 198.51.200.255 192.0.2.1
+ log_test "h1 -> net3: reply from r1 (not forwarding)"
+ ping_test_from $h1 192.0.2.255 192.0.2.1
+ log_test "h1 -> net1: reply from r1 (not dropping)"
+ ping_test_from $h1 255.255.255.255 192.0.2.1
+ log_test "h1 -> 255.255.255.255: reply from r1 (not forwarding)"
+
+ ping_test_from $h2 192.0.2.255 198.51.100.1
+ log_test "h2 -> net1: reply from r1 (not forwarding)"
+ ping_test_from $h2 198.51.200.255 198.51.100.1
+ log_test "h2 -> net3: reply from r1 (not forwarding)"
+ ping_test_from $h2 198.51.100.255 198.51.100.1
+ log_test "h2 -> net2: reply from r1 (not dropping)"
+ ping_test_from $h2 255.255.255.255 198.51.100.1
+ log_test "h2 -> 255.255.255.255: reply from r1 (not forwarding)"
+ bc_forwarding_restore
+
+ bc_forwarding_enable
+ log_info "bc_forwarding enabled on r1 =>"
+ ping_test_from $h1 198.51.100.255 198.51.100.2
+ log_test "h1 -> net2: reply from h2 (forwarding)"
+ ping_test_from $h1 198.51.200.255 198.51.200.2
+ log_test "h1 -> net3: reply from h3 (forwarding)"
+ ping_test_from $h1 192.0.2.255 192.0.2.1 1
+ log_test "h1 -> net1: no reply (dropping)"
+ ping_test_from $h1 255.255.255.255 192.0.2.1
+ log_test "h1 -> 255.255.255.255: reply from r1 (not forwarding)"
+
+ ping_test_from $h2 192.0.2.255 192.0.2.2
+ log_test "h2 -> net1: reply from h1 (forwarding)"
+ ping_test_from $h2 198.51.200.255 198.51.200.2
+ log_test "h2 -> net3: reply from h3 (forwarding)"
+ ping_test_from $h2 198.51.100.255 198.51.100.1 1
+ log_test "h2 -> net2: no reply (dropping)"
+ ping_test_from $h2 255.255.255.255 198.51.100.1
+ log_test "h2 -> 255.255.255.255: reply from r1 (not forwarding)"
+ bc_forwarding_restore
+
+ sysctl_restore net.ipv4.icmp_echo_ignore_broadcasts
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
new file mode 100755
index 000000000..79a209927
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -0,0 +1,342 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+ vrf_create "vrf-r1"
+ ip link set dev $rp11 master vrf-r1
+ ip link set dev $rp12 master vrf-r1
+ ip link set dev $rp13 master vrf-r1
+
+ ip link set dev vrf-r1 up
+ ip link set dev $rp11 up
+ ip link set dev $rp12 up
+ ip link set dev $rp13 up
+
+ ip address add 192.0.2.1/24 dev $rp11
+ ip address add 2001:db8:1::1/64 dev $rp11
+
+ ip address add 169.254.2.12/24 dev $rp12
+ ip address add fe80:2::12/64 dev $rp12
+
+ ip address add 169.254.3.13/24 dev $rp13
+ ip address add fe80:3::13/64 dev $rp13
+
+ ip route add 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 \
+ nexthop via 169.254.3.23 dev $rp13
+ ip route add 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-r1
+ ip route del 198.51.100.0/24 vrf vrf-r1
+
+ ip address del fe80:3::13/64 dev $rp13
+ ip address del 169.254.3.13/24 dev $rp13
+
+ ip address del fe80:2::12/64 dev $rp12
+ ip address del 169.254.2.12/24 dev $rp12
+
+ ip address del 2001:db8:1::1/64 dev $rp11
+ ip address del 192.0.2.1/24 dev $rp11
+
+ ip link set dev $rp13 down
+ ip link set dev $rp12 down
+ ip link set dev $rp11 down
+
+ vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+ vrf_create "vrf-r2"
+ ip link set dev $rp21 master vrf-r2
+ ip link set dev $rp22 master vrf-r2
+ ip link set dev $rp23 master vrf-r2
+
+ ip link set dev vrf-r2 up
+ ip link set dev $rp21 up
+ ip link set dev $rp22 up
+ ip link set dev $rp23 up
+
+ ip address add 198.51.100.1/24 dev $rp21
+ ip address add 2001:db8:2::1/64 dev $rp21
+
+ ip address add 169.254.2.22/24 dev $rp22
+ ip address add fe80:2::22/64 dev $rp22
+
+ ip address add 169.254.3.23/24 dev $rp23
+ ip address add fe80:3::23/64 dev $rp23
+
+ ip route add 192.0.2.0/24 vrf vrf-r2 \
+ nexthop via 169.254.2.12 dev $rp22 \
+ nexthop via 169.254.3.13 dev $rp23
+ ip route add 2001:db8:1::/64 vrf vrf-r2 \
+ nexthop via fe80:2::12 dev $rp22 \
+ nexthop via fe80:3::13 dev $rp23
+}
+
+router2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-r2
+ ip route del 192.0.2.0/24 vrf vrf-r2
+
+ ip address del fe80:3::23/64 dev $rp23
+ ip address del 169.254.3.23/24 dev $rp23
+
+ ip address del fe80:2::22/64 dev $rp22
+ ip address del 169.254.2.22/24 dev $rp22
+
+ ip address del 2001:db8:2::1/64 dev $rp21
+ ip address del 198.51.100.1/24 dev $rp21
+
+ ip link set dev $rp23 down
+ ip link set dev $rp22 down
+ ip link set dev $rp21 down
+
+ vrf_destroy "vrf-r2"
+}
+
+multipath4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the configured weights.
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+ ip route replace 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
+ nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ # Restore settings.
+ ip route replace 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 \
+ nexthop via 169.254.3.23 dev $rp13
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the configured weights.
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+ nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+ nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+ done
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+}
+
+multipath_test()
+{
+ log_info "Running IPv4 multipath tests"
+ multipath4_test "ECMP" 1 1
+ multipath4_test "Weighted MP 2:1" 2 1
+ multipath4_test "Weighted MP 11:45" 11 45
+
+ log_info "Running IPv6 multipath tests"
+ multipath6_test "ECMP" 1 1
+ multipath6_test "Weighted MP 2:1" 2 1
+ multipath6_test "Weighted MP 11:45" 11 45
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ multipath6_l4_test "ECMP" 1 1
+ multipath6_l4_test "Weighted MP 2:1" 2 1
+ multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp11=${NETIFS[p2]}
+
+ rp12=${NETIFS[p3]}
+ rp22=${NETIFS[p4]}
+
+ rp13=${NETIFS[p5]}
+ rp23=${NETIFS[p6]}
+
+ rp21=${NETIFS[p7]}
+ h2=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router1_create
+ router2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
new file mode 100755
index 000000000..813d02d19
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -0,0 +1,213 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
+ mirred_egress_mirror_test gact_trap_test"
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24
+}
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.2/24
+ tc qdisc add dev $swp1 clsact
+
+ simple_if_init $swp2 192.0.2.1/24
+}
+
+switch_destroy()
+{
+ simple_if_fini $swp2 192.0.2.1/24
+
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1 192.0.2.2/24
+}
+
+mirred_egress_test()
+{
+ local action=$1
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched without redirect rule inserted"
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action mirred egress $action \
+ dev $swp2
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match incoming $action packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "mirred egress $action ($tcflags)"
+}
+
+gact_drop_and_ok_test()
+{
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 102 1
+ check_err $? "Packet was not dropped"
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action ok
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_err $? "Did not see passed packet"
+
+ tc_check_packets "dev $swp1 ingress" 102 2
+ check_fail $? "Packet was dropped and it should not reach here"
+
+ tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "gact drop and ok ($tcflags)"
+}
+
+gact_trap_test()
+{
+ RET=0
+
+ if [[ "$tcflags" != "skip_sw" ]]; then
+ return 0;
+ fi
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_hw dst_ip 192.0.2.2 action drop
+ tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+ dev $swp2
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_fail $? "Saw packet without trap rule inserted"
+
+ tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action trap
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 102 1
+ check_err $? "Packet was not trapped"
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_err $? "Did not see trapped packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 3 handle 103 flower
+ tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "trap ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ swp1origmac=$(mac_get $swp1)
+ swp2origmac=$(mac_get $swp2)
+ ip link set $swp1 address $h2mac
+ ip link set $swp2 address $h1mac
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ ip link set $swp2 address $swp2origmac
+ ip link set $swp1 address $swp1origmac
+}
+
+mirred_egress_redirect_test()
+{
+ mirred_egress_test "redirect"
+}
+
+mirred_egress_mirror_test()
+{
+ mirred_egress_test "mirror"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
new file mode 100755
index 000000000..2934fb5ed
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_chains.sh
@@ -0,0 +1,205 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="unreachable_chain_test gact_goto_chain_test create_destroy_chain \
+ template_filter_fits"
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24
+}
+
+unreachable_chain_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower $tcflags dst_mac $h2mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 1101 1
+ check_fail $? "matched on filter in unreachable chain"
+
+ tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower
+
+ log_test "unreachable chain ($tcflags)"
+}
+
+gact_goto_chain_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower $tcflags dst_mac $h2mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_mac $h2mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_mac $h2mac action goto chain 1
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on correct filter with goto chain action"
+
+ tc_check_packets "dev $h2 ingress" 1101 1
+ check_err $? "Did not match on correct filter in chain 1"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower
+
+ log_test "gact goto chain ($tcflags)"
+}
+
+create_destroy_chain()
+{
+ RET=0
+
+ tc chain add dev $h2 ingress
+ check_err $? "Failed to create default chain"
+
+ output="$(tc -j chain get dev $h2 ingress)"
+ check_err $? "Failed to get default chain"
+
+ echo $output | jq -e ".[] | select(.chain == 0)" &> /dev/null
+ check_err $? "Unexpected output for default chain"
+
+ tc chain add dev $h2 ingress chain 1
+ check_err $? "Failed to create chain 1"
+
+ output="$(tc -j chain get dev $h2 ingress chain 1)"
+ check_err $? "Failed to get chain 1"
+
+ echo $output | jq -e ".[] | select(.chain == 1)" &> /dev/null
+ check_err $? "Unexpected output for chain 1"
+
+ output="$(tc -j chain show dev $h2 ingress)"
+ check_err $? "Failed to dump chains"
+
+ echo $output | jq -e ".[] | select(.chain == 0)" &> /dev/null
+ check_err $? "Can't find default chain in dump"
+
+ echo $output | jq -e ".[] | select(.chain == 1)" &> /dev/null
+ check_err $? "Can't find chain 1 in dump"
+
+ tc chain del dev $h2 ingress
+ check_err $? "Failed to destroy default chain"
+
+ tc chain del dev $h2 ingress chain 1
+ check_err $? "Failed to destroy chain 1"
+
+ log_test "create destroy chain"
+}
+
+template_filter_fits()
+{
+ RET=0
+
+ tc chain add dev $h2 ingress protocol ip \
+ flower dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null
+ tc chain add dev $h2 ingress chain 1 protocol ip \
+ flower src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 1101 \
+ flower dst_mac $h2mac action drop
+ check_err $? "Failed to insert filter which fits template"
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 1102 \
+ flower src_mac $h2mac action drop &> /dev/null
+ check_fail $? "Incorrectly succeded to insert filter which does not template"
+
+ tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower src_mac $h2mac action drop
+ check_err $? "Failed to insert filter which fits template"
+
+ tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \
+ flower dst_mac $h2mac action drop &> /dev/null
+ check_fail $? "Incorrectly succeded to insert filter which does not template"
+
+ tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \
+ flower &> /dev/null
+ tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower &> /dev/null
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 1102 \
+ flower &> /dev/null
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 1101 \
+ flower &> /dev/null
+
+ tc chain del dev $h2 ingress chain 1
+ tc chain del dev $h2 ingress
+
+ log_test "template filter fits"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+check_tc_chain_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
new file mode 100644
index 000000000..9d3b64a2a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+CHECK_TC="yes"
+
+tc_check_packets()
+{
+ local id=$1
+ local handle=$2
+ local count=$3
+ local ret
+
+ output="$(tc -j -s filter show $id)"
+ # workaround the jq bug which causes jq to return 0 in case input is ""
+ ret=$?
+ if [[ $ret -ne 0 ]]; then
+ return $ret
+ fi
+ echo $output | \
+ jq -e ".[] \
+ | select(.options.handle == $handle) \
+ | select(.options.actions[0].stats.packets == $count)" \
+ &> /dev/null
+ return $?
+}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
new file mode 100755
index 000000000..20d1077e5
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -0,0 +1,260 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
+ match_src_ip_test match_ip_flags_test"
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+match_dst_mac_test()
+{
+ local dummy_mac=de:ad:be:ef:aa:aa
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_mac $dummy_mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_mac $h2mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "dst_mac match ($tcflags)"
+}
+
+match_src_mac_test()
+{
+ local dummy_mac=de:ad:be:ef:aa:aa
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags src_mac $dummy_mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags src_mac $h1mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "src_mac match ($tcflags)"
+}
+
+match_dst_ip_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 198.51.100.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.2.0/24 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on correct filter with mask"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+ log_test "dst_ip match ($tcflags)"
+}
+
+match_src_ip_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags src_ip 198.51.100.1 action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags src_ip 192.0.2.1 action drop
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags src_ip 192.0.2.0/24 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on correct filter with mask"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+ log_test "src_ip match ($tcflags)"
+}
+
+match_ip_flags_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags ip_flags frag action continue
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags ip_flags firstfrag action continue
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags ip_flags nofirstfrag action continue
+ tc filter add dev $h2 ingress protocol ip pref 4 handle 104 flower \
+ $tcflags ip_flags nofrag action drop
+
+ $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "frag=0" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on wrong frag filter (nofrag)"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_fail $? "Matched on wrong firstfrag filter (nofrag)"
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on nofirstfrag filter (nofrag) "
+
+ tc_check_packets "dev $h2 ingress" 104 1
+ check_err $? "Did not match on nofrag filter (nofrag)"
+
+ $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "frag=0,mf" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on frag filter (1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match fistfrag filter (1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Matched on wrong nofirstfrag filter (1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 104 1
+ check_err $? "Match on wrong nofrag filter (1stfrag)"
+
+ $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "frag=256,mf" -q
+ $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "frag=256" -q
+
+ tc_check_packets "dev $h2 ingress" 101 3
+ check_err $? "Did not match on frag filter (no1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Matched on wrong firstfrag filter (no1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 103 3
+ check_err $? "Did not match on nofirstfrag filter (no1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 104 1
+ check_err $? "Matched on nofrag filter (no1stfrag)"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+ tc filter del dev $h2 ingress protocol ip pref 4 handle 104 flower
+
+ log_test "ip_flags match ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
new file mode 100755
index 000000000..9826a446e
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="shared_block_test"
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.1/24
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.1/24
+}
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.2/24
+ tc qdisc add dev $swp1 ingress_block 22 egress_block 23 clsact
+
+ simple_if_init $swp2 192.0.2.2/24
+ tc qdisc add dev $swp2 ingress_block 22 egress_block 23 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ simple_if_fini $swp2 192.0.2.2/24
+
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1 192.0.2.2/24
+}
+
+shared_block_test()
+{
+ RET=0
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "block 22" 101 1
+ check_err $? "Did not match first incoming packet on a block"
+
+ $MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "block 22" 101 2
+ check_err $? "Did not match second incoming packet on a block"
+
+ tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+ log_test "shared block ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ swmac=$(mac_get $swp1)
+ swp2origmac=$(mac_get $swp2)
+ ip link set $swp2 address $swmac
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ ip link set $swp2 address $swp2origmac
+}
+
+check_tc_shblock_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/in_netns.sh b/tools/testing/selftests/net/in_netns.sh
new file mode 100755
index 000000000..88795b510
--- /dev/null
+++ b/tools/testing/selftests/net/in_netns.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Execute a subprocess in a network namespace
+
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+ ip netns add "${NETNS}"
+ ip -netns "${NETNS}" link set lo up
+}
+
+cleanup() {
+ ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+ip netns exec "${NETNS}" "$@"
+exit "$?"
diff --git a/tools/testing/selftests/net/ip6_gre_headroom.sh b/tools/testing/selftests/net/ip6_gre_headroom.sh
new file mode 100755
index 000000000..5b41e8bb6
--- /dev/null
+++ b/tools/testing/selftests/net/ip6_gre_headroom.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that enough headroom is reserved for the first packet passing through an
+# IPv6 GRE-like netdevice.
+
+setup_prepare()
+{
+ ip link add h1 type veth peer name swp1
+ ip link add h3 type veth peer name swp3
+
+ ip link set dev h1 up
+ ip address add 192.0.2.1/28 dev h1
+
+ ip link add dev vh3 type vrf table 20
+ ip link set dev h3 master vh3
+ ip link set dev vh3 up
+ ip link set dev h3 up
+
+ ip link set dev swp3 up
+ ip address add dev swp3 2001:db8:2::1/64
+ ip address add dev swp3 2001:db8:2::3/64
+
+ ip link set dev swp1 up
+ tc qdisc add dev swp1 clsact
+
+ ip link add name er6 type ip6erspan \
+ local 2001:db8:2::1 remote 2001:db8:2::2 oseq okey 123
+ ip link set dev er6 up
+
+ ip link add name gt6 type ip6gretap \
+ local 2001:db8:2::3 remote 2001:db8:2::4
+ ip link set dev gt6 up
+
+ sleep 1
+}
+
+cleanup()
+{
+ ip link del dev gt6
+ ip link del dev er6
+ ip link del dev swp1
+ ip link del dev swp3
+ ip link del dev vh3
+}
+
+test_headroom()
+{
+ local type=$1; shift
+ local tundev=$1; shift
+
+ tc filter add dev swp1 ingress pref 1000 matchall skip_hw \
+ action mirred egress mirror dev $tundev
+ ping -I h1 192.0.2.2 -c 1 -w 2 &> /dev/null
+ tc filter del dev swp1 ingress pref 1000
+
+ # If it doesn't panic, it passes.
+ printf "TEST: %-60s [PASS]\n" "$type headroom"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+test_headroom ip6gretap gt6
+test_headroom ip6erspan er6
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
new file mode 100644
index 000000000..c53959193
--- /dev/null
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -0,0 +1,810 @@
+/* Evaluate MSG_ZEROCOPY
+ *
+ * Send traffic between two processes over one of the supported
+ * protocols and modes:
+ *
+ * PF_INET/PF_INET6
+ * - SOCK_STREAM
+ * - SOCK_DGRAM
+ * - SOCK_DGRAM with UDP_CORK
+ * - SOCK_RAW
+ * - SOCK_RAW with IP_HDRINCL
+ *
+ * PF_PACKET
+ * - SOCK_DGRAM
+ * - SOCK_RAW
+ *
+ * PF_RDS
+ * - SOCK_SEQPACKET
+ *
+ * Start this program on two connected hosts, one in send mode and
+ * the other with option '-r' to put it in receiver mode.
+ *
+ * If zerocopy mode ('-z') is enabled, the sender will verify that
+ * the kernel queues completions on the error queue for all zerocopy
+ * transfers.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <linux/rds.h>
+
+#ifndef SO_EE_ORIGIN_ZEROCOPY
+#define SO_EE_ORIGIN_ZEROCOPY 5
+#endif
+
+#ifndef SO_ZEROCOPY
+#define SO_ZEROCOPY 60
+#endif
+
+#ifndef SO_EE_CODE_ZEROCOPY_COPIED
+#define SO_EE_CODE_ZEROCOPY_COPIED 1
+#endif
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY 0x4000000
+#endif
+
+static int cfg_cork;
+static bool cfg_cork_mixed;
+static int cfg_cpu = -1; /* default: pin to last cpu */
+static int cfg_family = PF_UNSPEC;
+static int cfg_ifindex = 1;
+static int cfg_payload_len;
+static int cfg_port = 8000;
+static bool cfg_rx;
+static int cfg_runtime_ms = 4200;
+static int cfg_verbose;
+static int cfg_waittime_ms = 500;
+static bool cfg_zerocopy;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+static struct sockaddr_storage cfg_src_addr;
+
+static char payload[IP_MAXPACKET];
+static long packets, bytes, completions, expected_completions;
+static int zerocopied = -1;
+static uint32_t next_completion;
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static uint16_t get_ip_csum(const uint16_t *start, int num_words)
+{
+ unsigned long sum = 0;
+ int i;
+
+ for (i = 0; i < num_words; i++)
+ sum += start[i];
+
+ while (sum >> 16)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+
+ return ~sum;
+}
+
+static int do_setcpu(int cpu)
+{
+ cpu_set_t mask;
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+ if (sched_setaffinity(0, sizeof(mask), &mask))
+ fprintf(stderr, "cpu: unable to pin, may increase variance.\n");
+ else if (cfg_verbose)
+ fprintf(stderr, "cpu: %u\n", cpu);
+
+ return 0;
+}
+
+static void do_setsockopt(int fd, int level, int optname, int val)
+{
+ if (setsockopt(fd, level, optname, &val, sizeof(val)))
+ error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
+}
+
+static int do_poll(int fd, int events)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.events = events;
+ pfd.revents = 0;
+ pfd.fd = fd;
+
+ ret = poll(&pfd, 1, cfg_waittime_ms);
+ if (ret == -1)
+ error(1, errno, "poll");
+
+ return ret && (pfd.revents & events);
+}
+
+static int do_accept(int fd)
+{
+ int fda = fd;
+
+ fd = accept(fda, NULL, NULL);
+ if (fd == -1)
+ error(1, errno, "accept");
+ if (close(fda))
+ error(1, errno, "close listen sock");
+
+ return fd;
+}
+
+static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
+{
+ struct cmsghdr *cm;
+
+ if (!msg->msg_control)
+ error(1, errno, "NULL cookie");
+ cm = (void *)msg->msg_control;
+ cm->cmsg_len = CMSG_LEN(sizeof(cookie));
+ cm->cmsg_level = SOL_RDS;
+ cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
+ memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
+}
+
+static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
+{
+ int ret, len, i, flags;
+ static uint32_t cookie;
+ char ckbuf[CMSG_SPACE(sizeof(cookie))];
+
+ len = 0;
+ for (i = 0; i < msg->msg_iovlen; i++)
+ len += msg->msg_iov[i].iov_len;
+
+ flags = MSG_DONTWAIT;
+ if (do_zerocopy) {
+ flags |= MSG_ZEROCOPY;
+ if (domain == PF_RDS) {
+ memset(&msg->msg_control, 0, sizeof(msg->msg_control));
+ msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
+ msg->msg_control = (struct cmsghdr *)ckbuf;
+ add_zcopy_cookie(msg, ++cookie);
+ }
+ }
+
+ ret = sendmsg(fd, msg, flags);
+ if (ret == -1 && errno == EAGAIN)
+ return false;
+ if (ret == -1)
+ error(1, errno, "send");
+ if (cfg_verbose && ret != len)
+ fprintf(stderr, "send: ret=%u != %u\n", ret, len);
+
+ if (len) {
+ packets++;
+ bytes += ret;
+ if (do_zerocopy && ret)
+ expected_completions++;
+ }
+ if (do_zerocopy && domain == PF_RDS) {
+ msg->msg_control = NULL;
+ msg->msg_controllen = 0;
+ }
+
+ return true;
+}
+
+static void do_sendmsg_corked(int fd, struct msghdr *msg)
+{
+ bool do_zerocopy = cfg_zerocopy;
+ int i, payload_len, extra_len;
+
+ /* split up the packet. for non-multiple, make first buffer longer */
+ payload_len = cfg_payload_len / cfg_cork;
+ extra_len = cfg_payload_len - (cfg_cork * payload_len);
+
+ do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
+
+ for (i = 0; i < cfg_cork; i++) {
+
+ /* in mixed-frags mode, alternate zerocopy and copy frags
+ * start with non-zerocopy, to ensure attach later works
+ */
+ if (cfg_cork_mixed)
+ do_zerocopy = (i & 1);
+
+ msg->msg_iov[0].iov_len = payload_len + extra_len;
+ extra_len = 0;
+
+ do_sendmsg(fd, msg, do_zerocopy,
+ (cfg_dst_addr.ss_family == AF_INET ?
+ PF_INET : PF_INET6));
+ }
+
+ do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
+}
+
+static int setup_iph(struct iphdr *iph, uint16_t payload_len)
+{
+ struct sockaddr_in *daddr = (void *) &cfg_dst_addr;
+ struct sockaddr_in *saddr = (void *) &cfg_src_addr;
+
+ memset(iph, 0, sizeof(*iph));
+
+ iph->version = 4;
+ iph->tos = 0;
+ iph->ihl = 5;
+ iph->ttl = 2;
+ iph->saddr = saddr->sin_addr.s_addr;
+ iph->daddr = daddr->sin_addr.s_addr;
+ iph->protocol = IPPROTO_EGP;
+ iph->tot_len = htons(sizeof(*iph) + payload_len);
+ iph->check = get_ip_csum((void *) iph, iph->ihl << 1);
+
+ return sizeof(*iph);
+}
+
+static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len)
+{
+ struct sockaddr_in6 *daddr = (void *) &cfg_dst_addr;
+ struct sockaddr_in6 *saddr = (void *) &cfg_src_addr;
+
+ memset(ip6h, 0, sizeof(*ip6h));
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(payload_len);
+ ip6h->nexthdr = IPPROTO_EGP;
+ ip6h->hop_limit = 2;
+ ip6h->saddr = saddr->sin6_addr;
+ ip6h->daddr = daddr->sin6_addr;
+
+ return sizeof(*ip6h);
+}
+
+
+static void setup_sockaddr(int domain, const char *str_addr,
+ struct sockaddr_storage *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (domain) {
+ case PF_INET:
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+ error(1, 0, "ipv4 parse error: %s", str_addr);
+ break;
+ case PF_INET6:
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+ error(1, 0, "ipv6 parse error: %s", str_addr);
+ break;
+ default:
+ error(1, 0, "illegal domain");
+ }
+}
+
+static int do_setup_tx(int domain, int type, int protocol)
+{
+ int fd;
+
+ fd = socket(domain, type, protocol);
+ if (fd == -1)
+ error(1, errno, "socket t");
+
+ do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
+ if (cfg_zerocopy)
+ do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
+
+ if (domain != PF_PACKET && domain != PF_RDS)
+ if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
+ error(1, errno, "connect");
+
+ if (domain == PF_RDS) {
+ if (bind(fd, (void *) &cfg_src_addr, cfg_alen))
+ error(1, errno, "bind");
+ }
+
+ return fd;
+}
+
+static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
+{
+ int i;
+
+ if (ck->num > RDS_MAX_ZCOOKIES)
+ error(1, 0, "Returned %d cookies, max expected %d\n",
+ ck->num, RDS_MAX_ZCOOKIES);
+ for (i = 0; i < ck->num; i++)
+ if (cfg_verbose >= 2)
+ fprintf(stderr, "%d\n", ck->cookies[i]);
+ return ck->num;
+}
+
+static bool do_recvmsg_completion(int fd)
+{
+ char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
+ struct rds_zcopy_cookies *ck;
+ struct cmsghdr *cmsg;
+ struct msghdr msg;
+ bool ret = false;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_control = cmsgbuf;
+ msg.msg_controllen = sizeof(cmsgbuf);
+
+ if (recvmsg(fd, &msg, MSG_DONTWAIT))
+ return ret;
+
+ if (msg.msg_flags & MSG_CTRUNC)
+ error(1, errno, "recvmsg notification: truncated");
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_RDS &&
+ cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
+
+ ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
+ completions += do_process_zerocopy_cookies(ck);
+ ret = true;
+ break;
+ }
+ error(0, 0, "ignoring cmsg at level %d type %d\n",
+ cmsg->cmsg_level, cmsg->cmsg_type);
+ }
+ return ret;
+}
+
+static bool do_recv_completion(int fd, int domain)
+{
+ struct sock_extended_err *serr;
+ struct msghdr msg = {};
+ struct cmsghdr *cm;
+ uint32_t hi, lo, range;
+ int ret, zerocopy;
+ char control[100];
+
+ if (domain == PF_RDS)
+ return do_recvmsg_completion(fd);
+
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (ret == -1 && errno == EAGAIN)
+ return false;
+ if (ret == -1)
+ error(1, errno, "recvmsg notification");
+ if (msg.msg_flags & MSG_CTRUNC)
+ error(1, errno, "recvmsg notification: truncated");
+
+ cm = CMSG_FIRSTHDR(&msg);
+ if (!cm)
+ error(1, 0, "cmsg: no cmsg");
+ if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
+ (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
+ (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
+ error(1, 0, "serr: wrong type: %d.%d",
+ cm->cmsg_level, cm->cmsg_type);
+
+ serr = (void *) CMSG_DATA(cm);
+
+ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
+ error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
+ if (serr->ee_errno != 0)
+ error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
+
+ hi = serr->ee_data;
+ lo = serr->ee_info;
+ range = hi - lo + 1;
+
+ /* Detect notification gaps. These should not happen often, if at all.
+ * Gaps can occur due to drops, reordering and retransmissions.
+ */
+ if (lo != next_completion)
+ fprintf(stderr, "gap: %u..%u does not append to %u\n",
+ lo, hi, next_completion);
+ next_completion = hi + 1;
+
+ zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
+ if (zerocopied == -1)
+ zerocopied = zerocopy;
+ else if (zerocopied != zerocopy) {
+ fprintf(stderr, "serr: inconsistent\n");
+ zerocopied = zerocopy;
+ }
+
+ if (cfg_verbose >= 2)
+ fprintf(stderr, "completed: %u (h=%u l=%u)\n",
+ range, hi, lo);
+
+ completions += range;
+ return true;
+}
+
+/* Read all outstanding messages on the errqueue */
+static void do_recv_completions(int fd, int domain)
+{
+ while (do_recv_completion(fd, domain)) {}
+}
+
+/* Wait for all remaining completions on the errqueue */
+static void do_recv_remaining_completions(int fd, int domain)
+{
+ int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
+
+ while (completions < expected_completions &&
+ gettimeofday_ms() < tstop) {
+ if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
+ do_recv_completions(fd, domain);
+ }
+
+ if (completions < expected_completions)
+ fprintf(stderr, "missing notifications: %lu < %lu\n",
+ completions, expected_completions);
+}
+
+static void do_tx(int domain, int type, int protocol)
+{
+ struct iovec iov[3] = { {0} };
+ struct sockaddr_ll laddr;
+ struct msghdr msg = {0};
+ struct ethhdr eth;
+ union {
+ struct ipv6hdr ip6h;
+ struct iphdr iph;
+ } nh;
+ uint64_t tstop;
+ int fd;
+
+ fd = do_setup_tx(domain, type, protocol);
+
+ if (domain == PF_PACKET) {
+ uint16_t proto = cfg_family == PF_INET ? ETH_P_IP : ETH_P_IPV6;
+
+ /* sock_raw passes ll header as data */
+ if (type == SOCK_RAW) {
+ memset(eth.h_dest, 0x06, ETH_ALEN);
+ memset(eth.h_source, 0x02, ETH_ALEN);
+ eth.h_proto = htons(proto);
+ iov[0].iov_base = &eth;
+ iov[0].iov_len = sizeof(eth);
+ msg.msg_iovlen++;
+ }
+
+ /* both sock_raw and sock_dgram expect name */
+ memset(&laddr, 0, sizeof(laddr));
+ laddr.sll_family = AF_PACKET;
+ laddr.sll_ifindex = cfg_ifindex;
+ laddr.sll_protocol = htons(proto);
+ laddr.sll_halen = ETH_ALEN;
+
+ memset(laddr.sll_addr, 0x06, ETH_ALEN);
+
+ msg.msg_name = &laddr;
+ msg.msg_namelen = sizeof(laddr);
+ }
+
+ /* packet and raw sockets with hdrincl must pass network header */
+ if (domain == PF_PACKET || protocol == IPPROTO_RAW) {
+ if (cfg_family == PF_INET)
+ iov[1].iov_len = setup_iph(&nh.iph, cfg_payload_len);
+ else
+ iov[1].iov_len = setup_ip6h(&nh.ip6h, cfg_payload_len);
+
+ iov[1].iov_base = (void *) &nh;
+ msg.msg_iovlen++;
+ }
+
+ if (domain == PF_RDS) {
+ msg.msg_name = &cfg_dst_addr;
+ msg.msg_namelen = (cfg_dst_addr.ss_family == AF_INET ?
+ sizeof(struct sockaddr_in) :
+ sizeof(struct sockaddr_in6));
+ }
+
+ iov[2].iov_base = payload;
+ iov[2].iov_len = cfg_payload_len;
+ msg.msg_iovlen++;
+ msg.msg_iov = &iov[3 - msg.msg_iovlen];
+
+ tstop = gettimeofday_ms() + cfg_runtime_ms;
+ do {
+ if (cfg_cork)
+ do_sendmsg_corked(fd, &msg);
+ else
+ do_sendmsg(fd, &msg, cfg_zerocopy, domain);
+
+ while (!do_poll(fd, POLLOUT)) {
+ if (cfg_zerocopy)
+ do_recv_completions(fd, domain);
+ }
+
+ } while (gettimeofday_ms() < tstop);
+
+ if (cfg_zerocopy)
+ do_recv_remaining_completions(fd, domain);
+
+ if (close(fd))
+ error(1, errno, "close");
+
+ fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
+ packets, bytes >> 20, completions,
+ zerocopied == 1 ? 'y' : 'n');
+}
+
+static int do_setup_rx(int domain, int type, int protocol)
+{
+ int fd;
+
+ /* If tx over PF_PACKET, rx over PF_INET(6)/SOCK_RAW,
+ * to recv the only copy of the packet, not a clone
+ */
+ if (domain == PF_PACKET)
+ error(1, 0, "Use PF_INET/SOCK_RAW to read");
+
+ if (type == SOCK_RAW && protocol == IPPROTO_RAW)
+ error(1, 0, "IPPROTO_RAW: not supported on Rx");
+
+ fd = socket(domain, type, protocol);
+ if (fd == -1)
+ error(1, errno, "socket r");
+
+ do_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 1 << 21);
+ do_setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, 1 << 16);
+ do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
+
+ if (bind(fd, (void *) &cfg_dst_addr, cfg_alen))
+ error(1, errno, "bind");
+
+ if (type == SOCK_STREAM) {
+ if (listen(fd, 1))
+ error(1, errno, "listen");
+ fd = do_accept(fd);
+ }
+
+ return fd;
+}
+
+/* Flush all outstanding bytes for the tcp receive queue */
+static void do_flush_tcp(int fd)
+{
+ int ret;
+
+ /* MSG_TRUNC flushes up to len bytes */
+ ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ return;
+ if (ret == -1)
+ error(1, errno, "flush");
+ if (!ret)
+ return;
+
+ packets++;
+ bytes += ret;
+}
+
+/* Flush all outstanding datagrams. Verify first few bytes of each. */
+static void do_flush_datagram(int fd, int type)
+{
+ int ret, off = 0;
+ char buf[64];
+
+ /* MSG_TRUNC will return full datagram length */
+ ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC);
+ if (ret == -1 && errno == EAGAIN)
+ return;
+
+ /* raw ipv4 return with header, raw ipv6 without */
+ if (cfg_family == PF_INET && type == SOCK_RAW) {
+ off += sizeof(struct iphdr);
+ ret -= sizeof(struct iphdr);
+ }
+
+ if (ret == -1)
+ error(1, errno, "recv");
+ if (ret != cfg_payload_len)
+ error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
+ if (ret > sizeof(buf) - off)
+ ret = sizeof(buf) - off;
+ if (memcmp(buf + off, payload, ret))
+ error(1, 0, "recv: data mismatch");
+
+ packets++;
+ bytes += cfg_payload_len;
+}
+
+static void do_rx(int domain, int type, int protocol)
+{
+ uint64_t tstop;
+ int fd;
+
+ fd = do_setup_rx(domain, type, protocol);
+
+ tstop = gettimeofday_ms() + cfg_runtime_ms;
+ do {
+ if (type == SOCK_STREAM)
+ do_flush_tcp(fd);
+ else
+ do_flush_datagram(fd, type);
+
+ do_poll(fd, POLLIN);
+
+ } while (gettimeofday_ms() < tstop);
+
+ if (close(fd))
+ error(1, errno, "close");
+
+ fprintf(stderr, "rx=%lu (%lu MB)\n", packets, bytes >> 20);
+}
+
+static void do_test(int domain, int type, int protocol)
+{
+ int i;
+
+ if (cfg_cork && (domain == PF_PACKET || type != SOCK_DGRAM))
+ error(1, 0, "can only cork udp sockets");
+
+ do_setcpu(cfg_cpu);
+
+ for (i = 0; i < IP_MAXPACKET; i++)
+ payload[i] = 'a' + (i % 26);
+
+ if (cfg_rx)
+ do_rx(domain, type, protocol);
+ else
+ do_tx(domain, type, protocol);
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s [options] <test>", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ const int max_payload_len = sizeof(payload) -
+ sizeof(struct ipv6hdr) -
+ sizeof(struct tcphdr) -
+ 40 /* max tcp options */;
+ int c;
+ char *daddr = NULL, *saddr = NULL;
+ char *cfg_test;
+
+ cfg_payload_len = max_payload_len;
+
+ while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
+ switch (c) {
+ case '4':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET6;
+ cfg_alen = sizeof(struct sockaddr_in6);
+ break;
+ case 'c':
+ cfg_cork = strtol(optarg, NULL, 0);
+ break;
+ case 'C':
+ cfg_cpu = strtol(optarg, NULL, 0);
+ break;
+ case 'D':
+ daddr = optarg;
+ break;
+ case 'i':
+ cfg_ifindex = if_nametoindex(optarg);
+ if (cfg_ifindex == 0)
+ error(1, errno, "invalid iface: %s", optarg);
+ break;
+ case 'm':
+ cfg_cork_mixed = true;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 'r':
+ cfg_rx = true;
+ break;
+ case 's':
+ cfg_payload_len = strtoul(optarg, NULL, 0);
+ break;
+ case 'S':
+ saddr = optarg;
+ break;
+ case 't':
+ cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
+ break;
+ case 'v':
+ cfg_verbose++;
+ break;
+ case 'z':
+ cfg_zerocopy = true;
+ break;
+ }
+ }
+
+ cfg_test = argv[argc - 1];
+ if (strcmp(cfg_test, "rds") == 0) {
+ if (!daddr)
+ error(1, 0, "-D <server addr> required for PF_RDS\n");
+ if (!cfg_rx && !saddr)
+ error(1, 0, "-S <client addr> required for PF_RDS\n");
+ }
+ setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
+ setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
+
+ if (cfg_payload_len > max_payload_len)
+ error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
+ if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork))
+ error(1, 0, "-m: cork_mixed requires corking and zerocopy");
+
+ if (optind != argc - 1)
+ usage(argv[0]);
+}
+
+int main(int argc, char **argv)
+{
+ const char *cfg_test;
+
+ parse_opts(argc, argv);
+
+ cfg_test = argv[argc - 1];
+
+ if (!strcmp(cfg_test, "packet"))
+ do_test(PF_PACKET, SOCK_RAW, 0);
+ else if (!strcmp(cfg_test, "packet_dgram"))
+ do_test(PF_PACKET, SOCK_DGRAM, 0);
+ else if (!strcmp(cfg_test, "raw"))
+ do_test(cfg_family, SOCK_RAW, IPPROTO_EGP);
+ else if (!strcmp(cfg_test, "raw_hdrincl"))
+ do_test(cfg_family, SOCK_RAW, IPPROTO_RAW);
+ else if (!strcmp(cfg_test, "tcp"))
+ do_test(cfg_family, SOCK_STREAM, 0);
+ else if (!strcmp(cfg_test, "udp"))
+ do_test(cfg_family, SOCK_DGRAM, 0);
+ else if (!strcmp(cfg_test, "rds"))
+ do_test(PF_RDS, SOCK_SEQPACKET, 0);
+ else
+ error(1, 0, "unknown cfg_test %s", cfg_test);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
new file mode 100755
index 000000000..c43c6debd
--- /dev/null
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+#
+# Send data between two processes across namespaces
+# Run twice: once without and once with zerocopy
+
+set -e
+
+readonly DEV="veth0"
+readonly DEV_MTU=65535
+readonly BIN="./msg_zerocopy"
+
+readonly RAND="$(mktemp -u XXXXXX)"
+readonly NSPREFIX="ns-${RAND}"
+readonly NS1="${NSPREFIX}1"
+readonly NS2="${NSPREFIX}2"
+
+readonly SADDR4='192.168.1.1'
+readonly DADDR4='192.168.1.2'
+readonly SADDR6='fd::1'
+readonly DADDR6='fd::2'
+
+readonly path_sysctl_mem="net.core.optmem_max"
+
+# No arguments: automated test
+if [[ "$#" -eq "0" ]]; then
+ $0 4 tcp -t 1
+ $0 6 tcp -t 1
+ echo "OK. All tests passed"
+ exit 0
+fi
+
+# Argument parsing
+if [[ "$#" -lt "2" ]]; then
+ echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>"
+ exit 1
+fi
+
+readonly IP="$1"
+shift
+readonly TXMODE="$1"
+shift
+readonly EXTRA_ARGS="$@"
+
+# Argument parsing: configure addresses
+if [[ "${IP}" == "4" ]]; then
+ readonly SADDR="${SADDR4}"
+ readonly DADDR="${DADDR4}"
+elif [[ "${IP}" == "6" ]]; then
+ readonly SADDR="${SADDR6}"
+ readonly DADDR="${DADDR6}"
+else
+ echo "Invalid IP version ${IP}"
+ exit 1
+fi
+
+# Argument parsing: select receive mode
+#
+# This differs from send mode for
+# - packet: use raw recv, because packet receives skb clones
+# - raw_hdrinc: use raw recv, because hdrincl is a tx-only option
+case "${TXMODE}" in
+'packet' | 'packet_dgram' | 'raw_hdrincl')
+ RXMODE='raw'
+ ;;
+*)
+ RXMODE="${TXMODE}"
+ ;;
+esac
+
+# Start of state changes: install cleanup handler
+save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})"
+
+cleanup() {
+ ip netns del "${NS2}"
+ ip netns del "${NS1}"
+ sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}"
+}
+
+trap cleanup EXIT
+
+# Configure system settings
+sysctl -w -q "${path_sysctl_mem}=1000000"
+
+# Create virtual ethernet pair between network namespaces
+ip netns add "${NS1}"
+ip netns add "${NS2}"
+
+ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
+ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
+
+# Bring the devices up
+ip -netns "${NS1}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DEV}" up
+
+# Set fixed MAC addresses on the devices
+ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
+ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
+
+# Add fixed IP addresses to the devices
+ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
+ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
+ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad
+ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad
+
+# Optionally disable sg or csum offload to test edge cases
+# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
+
+do_test() {
+ local readonly ARGS="$1"
+
+ echo "ipv${IP} ${TXMODE} ${ARGS}"
+ ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
+ sleep 0.2
+ ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}"
+ wait
+}
+
+do_test "${EXTRA_ARGS}"
+do_test "-z ${EXTRA_ARGS}"
+echo ok
diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh
new file mode 100755
index 000000000..e3afcb424
--- /dev/null
+++ b/tools/testing/selftests/net/netdevice.sh
@@ -0,0 +1,205 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking network interface
+# For the moment it tests only ethernet interface (but wifi could be easily added)
+#
+# We assume that all network driver are loaded
+# if not they probably have failed earlier in the boot process and their logged error will be catched by another test
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# this function will try to up the interface
+# if already up, nothing done
+# arg1: network interface name
+kci_net_start()
+{
+ netdev=$1
+
+ ip link show "$netdev" |grep -q UP
+ if [ $? -eq 0 ];then
+ echo "SKIP: $netdev: interface already up"
+ return $ksft_skip
+ fi
+
+ ip link set "$netdev" up
+ if [ $? -ne 0 ];then
+ echo "FAIL: $netdev: Fail to up interface"
+ return 1
+ else
+ echo "PASS: $netdev: set interface up"
+ NETDEV_STARTED=1
+ fi
+ return 0
+}
+
+# this function will try to setup an IP and MAC address on a network interface
+# Doing nothing if the interface was already up
+# arg1: network interface name
+kci_net_setup()
+{
+ netdev=$1
+
+ # do nothing if the interface was already up
+ if [ $NETDEV_STARTED -eq 0 ];then
+ return 0
+ fi
+
+ MACADDR='02:03:04:05:06:07'
+ ip link set dev $netdev address "$MACADDR"
+ if [ $? -ne 0 ];then
+ echo "FAIL: $netdev: Cannot set MAC address"
+ else
+ ip link show $netdev |grep -q "$MACADDR"
+ if [ $? -eq 0 ];then
+ echo "PASS: $netdev: set MAC address"
+ else
+ echo "FAIL: $netdev: Cannot set MAC address"
+ fi
+ fi
+
+ #check that the interface did not already have an IP
+ ip address show "$netdev" |grep '^[[:space:]]*inet'
+ if [ $? -eq 0 ];then
+ echo "SKIP: $netdev: already have an IP"
+ return $ksft_skip
+ fi
+
+ # TODO what ipaddr to set ? DHCP ?
+ echo "SKIP: $netdev: set IP address"
+ return $ksft_skip
+}
+
+# test an ethtool command
+# arg1: return code for not supported (see ethtool code source)
+# arg2: summary of the command
+# arg3: command to execute
+kci_netdev_ethtool_test()
+{
+ if [ $# -le 2 ];then
+ echo "SKIP: $netdev: ethtool: invalid number of arguments"
+ return 1
+ fi
+ $3 >/dev/null
+ ret=$?
+ if [ $ret -ne 0 ];then
+ if [ $ret -eq "$1" ];then
+ echo "SKIP: $netdev: ethtool $2 not supported"
+ return $ksft_skip
+ else
+ echo "FAIL: $netdev: ethtool $2"
+ return 1
+ fi
+ else
+ echo "PASS: $netdev: ethtool $2"
+ fi
+ return 0
+}
+
+# test ethtool commands
+# arg1: network interface name
+kci_netdev_ethtool()
+{
+ netdev=$1
+
+ #check presence of ethtool
+ ethtool --version 2>/dev/null >/dev/null
+ if [ $? -ne 0 ];then
+ echo "SKIP: ethtool not present"
+ return $ksft_skip
+ fi
+
+ TMP_ETHTOOL_FEATURES="$(mktemp)"
+ if [ ! -e "$TMP_ETHTOOL_FEATURES" ];then
+ echo "SKIP: Cannot create a tmp file"
+ return 1
+ fi
+
+ ethtool -k "$netdev" > "$TMP_ETHTOOL_FEATURES"
+ if [ $? -ne 0 ];then
+ echo "FAIL: $netdev: ethtool list features"
+ rm "$TMP_ETHTOOL_FEATURES"
+ return 1
+ fi
+ echo "PASS: $netdev: ethtool list features"
+ #TODO for each non fixed features, try to turn them on/off
+ rm "$TMP_ETHTOOL_FEATURES"
+
+ kci_netdev_ethtool_test 74 'dump' "ethtool -d $netdev"
+ kci_netdev_ethtool_test 94 'stats' "ethtool -S $netdev"
+ return 0
+}
+
+# stop a netdev
+# arg1: network interface name
+kci_netdev_stop()
+{
+ netdev=$1
+
+ if [ $NETDEV_STARTED -eq 0 ];then
+ echo "SKIP: $netdev: interface kept up"
+ return 0
+ fi
+
+ ip link set "$netdev" down
+ if [ $? -ne 0 ];then
+ echo "FAIL: $netdev: stop interface"
+ return 1
+ fi
+ echo "PASS: $netdev: stop interface"
+ return 0
+}
+
+# run all test on a netdev
+# arg1: network interface name
+kci_test_netdev()
+{
+ NETDEV_STARTED=0
+ IFACE_TO_UPDOWN="$1"
+ IFACE_TO_TEST="$1"
+ #check for VLAN interface
+ MASTER_IFACE="$(echo $1 | cut -d@ -f2)"
+ if [ ! -z "$MASTER_IFACE" ];then
+ IFACE_TO_UPDOWN="$MASTER_IFACE"
+ IFACE_TO_TEST="$(echo $1 | cut -d@ -f1)"
+ fi
+
+ NETDEV_STARTED=0
+ kci_net_start "$IFACE_TO_UPDOWN"
+
+ kci_net_setup "$IFACE_TO_TEST"
+
+ kci_netdev_ethtool "$IFACE_TO_TEST"
+
+ kci_netdev_stop "$IFACE_TO_UPDOWN"
+ return 0
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+ip link show 2>/dev/null >/dev/null
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without the ip tool"
+ exit $ksft_skip
+fi
+
+TMP_LIST_NETDEV="$(mktemp)"
+if [ ! -e "$TMP_LIST_NETDEV" ];then
+ echo "FAIL: Cannot create a tmp file"
+ exit 1
+fi
+
+ip link show |grep '^[0-9]' | grep -oE '[[:space:]].*eth[0-9]*:|[[:space:]].*enp[0-9]s[0-9]:' | cut -d\ -f2 | cut -d: -f1> "$TMP_LIST_NETDEV"
+while read netdev
+do
+ kci_test_netdev "$netdev"
+done < "$TMP_LIST_NETDEV"
+
+rm "$TMP_LIST_NETDEV"
+exit 0
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
new file mode 100755
index 000000000..0ab9423d0
--- /dev/null
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -0,0 +1,477 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that route PMTU values match expectations, and that initial device MTU
+# values are assigned correctly
+#
+# Tests currently implemented:
+#
+# - pmtu_vti4_exception
+# Set up vti tunnel on top of veth, with xfrm states and policies, in two
+# namespaces with matching endpoints. Check that route exception is not
+# created if link layer MTU is not exceeded, then exceed it and check that
+# exception is created with the expected PMTU. The approach described
+# below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
+# changes alone won't affect PMTU
+#
+# - pmtu_vti6_exception
+# Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
+# namespaces with matching endpoints. Check that route exception is
+# created by exceeding link layer MTU with ping to other endpoint. Then
+# decrease and increase MTU of tunnel, checking that route exception PMTU
+# changes accordingly
+#
+# - pmtu_vti4_default_mtu
+# Set up vti4 tunnel on top of veth, in two namespaces with matching
+# endpoints. Check that MTU assigned to vti interface is the MTU of the
+# lower layer (veth) minus additional lower layer headers (zero, for veth)
+# minus IPv4 header length
+#
+# - pmtu_vti6_default_mtu
+# Same as above, for IPv6
+#
+# - pmtu_vti4_link_add_mtu
+# Set up vti4 interface passing MTU value at link creation, check MTU is
+# configured, and that link is not created with invalid MTU values
+#
+# - pmtu_vti6_link_add_mtu
+# Same as above, for IPv6
+#
+# - pmtu_vti6_link_change_mtu
+# Set up two dummy interfaces with different MTUs, create a vti6 tunnel
+# and check that configured MTU is used on link creation and changes, and
+# that MTU is properly calculated instead when MTU is not configured from
+# userspace
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Some systems don't have a ping6 binary anymore
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+tests="
+ pmtu_vti6_exception vti6: PMTU exceptions
+ pmtu_vti4_exception vti4: PMTU exceptions
+ pmtu_vti4_default_mtu vti4: default MTU assignment
+ pmtu_vti6_default_mtu vti6: default MTU assignment
+ pmtu_vti4_link_add_mtu vti4: MTU setting on link creation
+ pmtu_vti6_link_add_mtu vti6: MTU setting on link creation
+ pmtu_vti6_link_change_mtu vti6: MTU changes on link changes"
+
+NS_A="ns-$(mktemp -u XXXXXX)"
+NS_B="ns-$(mktemp -u XXXXXX)"
+ns_a="ip netns exec ${NS_A}"
+ns_b="ip netns exec ${NS_B}"
+
+veth4_a_addr="192.168.1.1"
+veth4_b_addr="192.168.1.2"
+veth4_mask="24"
+veth6_a_addr="fd00:1::a"
+veth6_b_addr="fd00:1::b"
+veth6_mask="64"
+
+vti4_a_addr="192.168.2.1"
+vti4_b_addr="192.168.2.2"
+vti4_mask="24"
+vti6_a_addr="fd00:2::a"
+vti6_b_addr="fd00:2::b"
+vti6_mask="64"
+
+dummy6_0_addr="fc00:1000::0"
+dummy6_1_addr="fc00:1001::0"
+dummy6_mask="64"
+
+cleanup_done=1
+err_buf=
+
+err() {
+ err_buf="${err_buf}${1}
+"
+}
+
+err_flush() {
+ echo -n "${err_buf}"
+ err_buf=
+}
+
+setup_namespaces() {
+ ip netns add ${NS_A} || return 1
+ ip netns add ${NS_B}
+}
+
+setup_veth() {
+ ${ns_a} ip link add veth_a type veth peer name veth_b || return 1
+ ${ns_a} ip link set veth_b netns ${NS_B}
+
+ ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
+ ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
+
+ ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
+ ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
+
+ ${ns_a} ip link set veth_a up
+ ${ns_b} ip link set veth_b up
+}
+
+setup_vti() {
+ proto=${1}
+ veth_a_addr="${2}"
+ veth_b_addr="${3}"
+ vti_a_addr="${4}"
+ vti_b_addr="${5}"
+ vti_mask=${6}
+
+ [ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
+
+ ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
+ ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
+
+ ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
+ ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
+
+ ${ns_a} ip link set vti${proto}_a up
+ ${ns_b} ip link set vti${proto}_b up
+
+ sleep 1
+}
+
+setup_vti4() {
+ setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask}
+}
+
+setup_vti6() {
+ setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask}
+}
+
+setup_xfrm() {
+ proto=${1}
+ veth_a_addr="${2}"
+ veth_b_addr="${3}"
+
+ ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
+ ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+ ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
+ ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
+
+ ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+ ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+ ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
+ ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
+}
+
+setup_xfrm4() {
+ setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
+}
+
+setup_xfrm6() {
+ setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
+}
+
+setup() {
+ [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
+
+ cleanup_done=0
+ for arg do
+ eval setup_${arg} || { echo " ${arg} not supported"; return 1; }
+ done
+}
+
+cleanup() {
+ [ ${cleanup_done} -eq 1 ] && return
+ ip netns del ${NS_A} 2> /dev/null
+ ip netns del ${NS_B} 2> /dev/null
+ cleanup_done=1
+}
+
+mtu() {
+ ns_cmd="${1}"
+ dev="${2}"
+ mtu="${3}"
+
+ ${ns_cmd} ip link set dev ${dev} mtu ${mtu}
+}
+
+mtu_parse() {
+ input="${1}"
+
+ next=0
+ for i in ${input}; do
+ [ ${next} -eq 1 ] && echo "${i}" && return
+ [ "${i}" = "mtu" ] && next=1
+ done
+}
+
+link_get() {
+ ns_cmd="${1}"
+ name="${2}"
+
+ ${ns_cmd} ip link show dev "${name}"
+}
+
+link_get_mtu() {
+ ns_cmd="${1}"
+ name="${2}"
+
+ mtu_parse "$(link_get "${ns_cmd}" ${name})"
+}
+
+route_get_dst_exception() {
+ ns_cmd="${1}"
+ dst="${2}"
+
+ ${ns_cmd} ip route get "${dst}"
+}
+
+route_get_dst_pmtu_from_exception() {
+ ns_cmd="${1}"
+ dst="${2}"
+
+ mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
+}
+
+test_pmtu_vti4_exception() {
+ setup namespaces veth vti4 xfrm4 || return 2
+
+ veth_mtu=1500
+ vti_mtu=$((veth_mtu - 20))
+
+ # SPI SN IV ICV pad length next header
+ esp_payload_rfc4106=$((vti_mtu - 4 - 4 - 8 - 16 - 1 - 1))
+ ping_payload=$((esp_payload_rfc4106 - 28))
+
+ mtu "${ns_a}" veth_a ${veth_mtu}
+ mtu "${ns_b}" veth_b ${veth_mtu}
+ mtu "${ns_a}" vti4_a ${vti_mtu}
+ mtu "${ns_b}" vti4_b ${vti_mtu}
+
+ # Send DF packet without exceeding link layer MTU, check that no
+ # exception is created
+ ${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
+ if [ "${pmtu}" != "" ]; then
+ err " unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}"
+ return 1
+ fi
+
+ # Now exceed link layer MTU by one byte, check that exception is created
+ ${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
+ if [ "${pmtu}" = "" ]; then
+ err " exception not created for IP payload length $((esp_payload_rfc4106 + 1))"
+ return 1
+ fi
+
+ # ...with the right PMTU value
+ if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then
+ err " wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}"
+ return 1
+ fi
+}
+
+test_pmtu_vti6_exception() {
+ setup namespaces veth vti6 xfrm6 || return 2
+ fail=0
+
+ # Create route exception by exceeding link layer MTU
+ mtu "${ns_a}" veth_a 4000
+ mtu "${ns_b}" veth_b 4000
+ mtu "${ns_a}" vti6_a 5000
+ mtu "${ns_b}" vti6_b 5000
+ ${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
+
+ # Check that exception was created
+ if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then
+ err " tunnel exceeding link layer MTU didn't create route exception"
+ return 1
+ fi
+
+ # Decrease tunnel MTU, check for PMTU decrease in route exception
+ mtu "${ns_a}" vti6_a 3000
+
+ if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then
+ err " decreasing tunnel MTU didn't decrease route exception PMTU"
+ fail=1
+ fi
+
+ # Increase tunnel MTU, check for PMTU increase in route exception
+ mtu "${ns_a}" vti6_a 9000
+ if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then
+ err " increasing tunnel MTU didn't increase route exception PMTU"
+ fail=1
+ fi
+
+ return ${fail}
+}
+
+test_pmtu_vti4_default_mtu() {
+ setup namespaces veth vti4 || return 2
+
+ # Check that MTU of vti device is MTU of veth minus IPv4 header length
+ veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
+ vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+ if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
+ err " vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
+ return 1
+ fi
+}
+
+test_pmtu_vti6_default_mtu() {
+ setup namespaces veth vti6 || return 2
+
+ # Check that MTU of vti device is MTU of veth minus IPv6 header length
+ veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
+ vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+ if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
+ err " vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
+ return 1
+ fi
+}
+
+test_pmtu_vti4_link_add_mtu() {
+ setup namespaces || return 2
+
+ ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+ [ $? -ne 0 ] && err " vti not supported" && return 2
+ ${ns_a} ip link del vti4_a
+
+ fail=0
+
+ min=68
+ max=$((65535 - 20))
+ # Check invalid values first
+ for v in $((min - 1)) $((max + 1)); do
+ ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null
+ # This can fail, or MTU can be adjusted to a proper value
+ [ $? -ne 0 ] && continue
+ mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+ if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
+ err " vti tunnel created with invalid MTU ${mtu}"
+ fail=1
+ fi
+ ${ns_a} ip link del vti4_a
+ done
+
+ # Now check valid values
+ for v in ${min} 1300 ${max}; do
+ ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+ mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+ ${ns_a} ip link del vti4_a
+ if [ "${mtu}" != "${v}" ]; then
+ err " vti MTU ${mtu} doesn't match configured value ${v}"
+ fail=1
+ fi
+ done
+
+ return ${fail}
+}
+
+test_pmtu_vti6_link_add_mtu() {
+ setup namespaces || return 2
+
+ ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+ [ $? -ne 0 ] && err " vti6 not supported" && return 2
+ ${ns_a} ip link del vti6_a
+
+ fail=0
+
+ min=68 # vti6 can carry IPv4 packets too
+ max=$((65535 - 40))
+ # Check invalid values first
+ for v in $((min - 1)) $((max + 1)); do
+ ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null
+ # This can fail, or MTU can be adjusted to a proper value
+ [ $? -ne 0 ] && continue
+ mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+ if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
+ err " vti6 tunnel created with invalid MTU ${v}"
+ fail=1
+ fi
+ ${ns_a} ip link del vti6_a
+ done
+
+ # Now check valid values
+ for v in 68 1280 1300 $((65535 - 40)); do
+ ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+ mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+ ${ns_a} ip link del vti6_a
+ if [ "${mtu}" != "${v}" ]; then
+ err " vti6 MTU ${mtu} doesn't match configured value ${v}"
+ fail=1
+ fi
+ done
+
+ return ${fail}
+}
+
+test_pmtu_vti6_link_change_mtu() {
+ setup namespaces || return 2
+
+ ${ns_a} ip link add dummy0 mtu 1500 type dummy
+ [ $? -ne 0 ] && err " dummy not supported" && return 2
+ ${ns_a} ip link add dummy1 mtu 3000 type dummy
+ ${ns_a} ip link set dummy0 up
+ ${ns_a} ip link set dummy1 up
+
+ ${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
+ ${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
+
+ fail=0
+
+ # Create vti6 interface bound to device, passing MTU, check it
+ ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+ mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+ if [ ${mtu} -ne 1300 ]; then
+ err " vti6 MTU ${mtu} doesn't match configured value 1300"
+ fail=1
+ fi
+
+ # Move to another device with different MTU, without passing MTU, check
+ # MTU is adjusted
+ ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
+ mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+ if [ ${mtu} -ne $((3000 - 40)) ]; then
+ err " vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
+ fail=1
+ fi
+
+ # Move it back, passing MTU, check MTU is not overridden
+ ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+ mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+ if [ ${mtu} -ne 1280 ]; then
+ err " vti6 MTU ${mtu} doesn't match configured value 1280"
+ fail=1
+ fi
+
+ return ${fail}
+}
+
+trap cleanup EXIT
+
+exitcode=0
+desc=0
+IFS="
+"
+for t in ${tests}; do
+ [ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
+
+ (
+ unset IFS
+ eval test_${name}
+ ret=$?
+ cleanup
+
+ if [ $ret -eq 0 ]; then
+ printf "TEST: %-60s [ OK ]\n" "${t}"
+ elif [ $ret -eq 1 ]; then
+ printf "TEST: %-60s [FAIL]\n" "${t}"
+ err_flush
+ exit 1
+ elif [ $ret -eq 2 ]; then
+ printf "TEST: %-60s [SKIP]\n" "${t}"
+ err_flush
+ fi
+ )
+ [ $? -ne 0 ] && exitcode=1
+done
+
+exit ${exitcode}
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
new file mode 100644
index 000000000..f496ba3b1
--- /dev/null
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -0,0 +1,486 @@
+/*
+ * Copyright 2013 Google Inc.
+ * Author: Willem de Bruijn (willemb@google.com)
+ *
+ * A basic test of packet socket fanout behavior.
+ *
+ * Control:
+ * - create fanout fails as expected with illegal flag combinations
+ * - join fanout fails as expected with diverging types or flags
+ *
+ * Datapath:
+ * Open a pair of packet sockets and a pair of INET sockets, send a known
+ * number of packets across the two INET sockets and count the number of
+ * packets enqueued onto the two packet sockets.
+ *
+ * The test currently runs for
+ * - PACKET_FANOUT_HASH
+ * - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER
+ * - PACKET_FANOUT_LB
+ * - PACKET_FANOUT_CPU
+ * - PACKET_FANOUT_ROLLOVER
+ * - PACKET_FANOUT_CBPF
+ * - PACKET_FANOUT_EBPF
+ *
+ * Todo:
+ * - functionality: PACKET_FANOUT_FLAG_DEFRAG
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE /* for sched_setaffinity */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/unistd.h> /* for __NR_bpf */
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/if_packet.h>
+#include <net/if.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "psock_lib.h"
+
+#define RING_NUM_FRAMES 20
+
+/* Open a socket in a given fanout mode.
+ * @return -1 if mode is bad, a valid socket otherwise */
+static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
+{
+ struct sockaddr_ll addr = {0};
+ int fd, val;
+
+ fd = socket(PF_PACKET, SOCK_RAW, 0);
+ if (fd < 0) {
+ perror("socket packet");
+ exit(1);
+ }
+
+ pair_udp_setfilter(fd);
+
+ addr.sll_family = AF_PACKET;
+ addr.sll_protocol = htons(ETH_P_IP);
+ addr.sll_ifindex = if_nametoindex("lo");
+ if (addr.sll_ifindex == 0) {
+ perror("if_nametoindex");
+ exit(1);
+ }
+ if (bind(fd, (void *) &addr, sizeof(addr))) {
+ perror("bind packet");
+ exit(1);
+ }
+
+ val = (((int) typeflags) << 16) | group_id;
+ if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
+ if (close(fd)) {
+ perror("close packet");
+ exit(1);
+ }
+ return -1;
+ }
+
+ return fd;
+}
+
+static void sock_fanout_set_cbpf(int fd)
+{
+ struct sock_filter bpf_filter[] = {
+ BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80), /* ldb [80] */
+ BPF_STMT(BPF_RET+BPF_A, 0), /* ret A */
+ };
+ struct sock_fprog bpf_prog;
+
+ bpf_prog.filter = bpf_filter;
+ bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &bpf_prog,
+ sizeof(bpf_prog))) {
+ perror("fanout data cbpf");
+ exit(1);
+ }
+}
+
+static void sock_fanout_getopts(int fd, uint16_t *typeflags, uint16_t *group_id)
+{
+ int sockopt;
+ socklen_t sockopt_len = sizeof(sockopt);
+
+ if (getsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+ &sockopt, &sockopt_len)) {
+ perror("failed to getsockopt");
+ exit(1);
+ }
+ *typeflags = sockopt >> 16;
+ *group_id = sockopt & 0xfffff;
+}
+
+static void sock_fanout_set_ebpf(int fd)
+{
+ static char log_buf[65536];
+
+ const int len_off = __builtin_offsetof(struct __sk_buff, len);
+ struct bpf_insn prog[] = {
+ { BPF_ALU64 | BPF_MOV | BPF_X, 6, 1, 0, 0 },
+ { BPF_LDX | BPF_W | BPF_MEM, 0, 6, len_off, 0 },
+ { BPF_JMP | BPF_JGE | BPF_K, 0, 0, 1, DATA_LEN },
+ { BPF_JMP | BPF_JA | BPF_K, 0, 0, 4, 0 },
+ { BPF_LD | BPF_B | BPF_ABS, 0, 0, 0, 0x50 },
+ { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 2, DATA_CHAR },
+ { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1, DATA_CHAR_1 },
+ { BPF_ALU | BPF_MOV | BPF_K, 0, 0, 0, 0 },
+ { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+ };
+ union bpf_attr attr;
+ int pfd;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insns = (unsigned long) prog;
+ attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
+ attr.license = (unsigned long) "GPL";
+ attr.log_buf = (unsigned long) log_buf,
+ attr.log_size = sizeof(log_buf),
+ attr.log_level = 1,
+
+ pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (pfd < 0) {
+ perror("bpf");
+ fprintf(stderr, "bpf verifier:\n%s\n", log_buf);
+ exit(1);
+ }
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
+ perror("fanout data ebpf");
+ exit(1);
+ }
+
+ if (close(pfd)) {
+ perror("close ebpf");
+ exit(1);
+ }
+}
+
+static char *sock_fanout_open_ring(int fd)
+{
+ struct tpacket_req req = {
+ .tp_block_size = getpagesize(),
+ .tp_frame_size = getpagesize(),
+ .tp_block_nr = RING_NUM_FRAMES,
+ .tp_frame_nr = RING_NUM_FRAMES,
+ };
+ char *ring;
+ int val = TPACKET_V2;
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, (void *) &val,
+ sizeof(val))) {
+ perror("packetsock ring setsockopt version");
+ exit(1);
+ }
+ if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req,
+ sizeof(req))) {
+ perror("packetsock ring setsockopt");
+ exit(1);
+ }
+
+ ring = mmap(0, req.tp_block_size * req.tp_block_nr,
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (ring == MAP_FAILED) {
+ perror("packetsock ring mmap");
+ exit(1);
+ }
+
+ return ring;
+}
+
+static int sock_fanout_read_ring(int fd, void *ring)
+{
+ struct tpacket2_hdr *header = ring;
+ int count = 0;
+
+ while (count < RING_NUM_FRAMES && header->tp_status & TP_STATUS_USER) {
+ count++;
+ header = ring + (count * getpagesize());
+ }
+
+ return count;
+}
+
+static int sock_fanout_read(int fds[], char *rings[], const int expect[])
+{
+ int ret[2];
+
+ ret[0] = sock_fanout_read_ring(fds[0], rings[0]);
+ ret[1] = sock_fanout_read_ring(fds[1], rings[1]);
+
+ fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n",
+ ret[0], ret[1], expect[0], expect[1]);
+
+ if ((!(ret[0] == expect[0] && ret[1] == expect[1])) &&
+ (!(ret[0] == expect[1] && ret[1] == expect[0]))) {
+ fprintf(stderr, "warning: incorrect queue lengths\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Test illegal mode + flag combination */
+static void test_control_single(void)
+{
+ fprintf(stderr, "test: control single socket\n");
+
+ if (sock_fanout_open(PACKET_FANOUT_ROLLOVER |
+ PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) {
+ fprintf(stderr, "ERROR: opened socket with dual rollover\n");
+ exit(1);
+ }
+}
+
+/* Test illegal group with different modes or flags */
+static void test_control_group(void)
+{
+ int fds[2];
+
+ fprintf(stderr, "test: control multiple sockets\n");
+
+ fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+ if (fds[0] == -1) {
+ fprintf(stderr, "ERROR: failed to open HASH socket\n");
+ exit(1);
+ }
+ if (sock_fanout_open(PACKET_FANOUT_HASH |
+ PACKET_FANOUT_FLAG_DEFRAG, 0) != -1) {
+ fprintf(stderr, "ERROR: joined group with wrong flag defrag\n");
+ exit(1);
+ }
+ if (sock_fanout_open(PACKET_FANOUT_HASH |
+ PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) {
+ fprintf(stderr, "ERROR: joined group with wrong flag ro\n");
+ exit(1);
+ }
+ if (sock_fanout_open(PACKET_FANOUT_CPU, 0) != -1) {
+ fprintf(stderr, "ERROR: joined group with wrong mode\n");
+ exit(1);
+ }
+ fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+ if (fds[1] == -1) {
+ fprintf(stderr, "ERROR: failed to join group\n");
+ exit(1);
+ }
+ if (close(fds[1]) || close(fds[0])) {
+ fprintf(stderr, "ERROR: closing sockets\n");
+ exit(1);
+ }
+}
+
+/* Test creating a unique fanout group ids */
+static void test_unique_fanout_group_ids(void)
+{
+ int fds[3];
+ uint16_t typeflags, first_group_id, second_group_id;
+
+ fprintf(stderr, "test: unique ids\n");
+
+ fds[0] = sock_fanout_open(PACKET_FANOUT_HASH |
+ PACKET_FANOUT_FLAG_UNIQUEID, 0);
+ if (fds[0] == -1) {
+ fprintf(stderr, "ERROR: failed to create a unique id group.\n");
+ exit(1);
+ }
+
+ sock_fanout_getopts(fds[0], &typeflags, &first_group_id);
+ if (typeflags != PACKET_FANOUT_HASH) {
+ fprintf(stderr, "ERROR: unexpected typeflags %x\n", typeflags);
+ exit(1);
+ }
+
+ if (sock_fanout_open(PACKET_FANOUT_CPU, first_group_id) != -1) {
+ fprintf(stderr, "ERROR: joined group with wrong type.\n");
+ exit(1);
+ }
+
+ fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, first_group_id);
+ if (fds[1] == -1) {
+ fprintf(stderr,
+ "ERROR: failed to join previously created group.\n");
+ exit(1);
+ }
+
+ fds[2] = sock_fanout_open(PACKET_FANOUT_HASH |
+ PACKET_FANOUT_FLAG_UNIQUEID, 0);
+ if (fds[2] == -1) {
+ fprintf(stderr,
+ "ERROR: failed to create a second unique id group.\n");
+ exit(1);
+ }
+
+ sock_fanout_getopts(fds[2], &typeflags, &second_group_id);
+ if (sock_fanout_open(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_UNIQUEID,
+ second_group_id) != -1) {
+ fprintf(stderr,
+ "ERROR: specified a group id when requesting unique id\n");
+ exit(1);
+ }
+
+ if (close(fds[0]) || close(fds[1]) || close(fds[2])) {
+ fprintf(stderr, "ERROR: closing sockets\n");
+ exit(1);
+ }
+}
+
+static int test_datapath(uint16_t typeflags, int port_off,
+ const int expect1[], const int expect2[])
+{
+ const int expect0[] = { 0, 0 };
+ char *rings[2];
+ uint8_t type = typeflags & 0xFF;
+ int fds[2], fds_udp[2][2], ret;
+
+ fprintf(stderr, "\ntest: datapath 0x%hx ports %hu,%hu\n",
+ typeflags, (uint16_t)PORT_BASE,
+ (uint16_t)(PORT_BASE + port_off));
+
+ fds[0] = sock_fanout_open(typeflags, 0);
+ fds[1] = sock_fanout_open(typeflags, 0);
+ if (fds[0] == -1 || fds[1] == -1) {
+ fprintf(stderr, "ERROR: failed open\n");
+ exit(1);
+ }
+ if (type == PACKET_FANOUT_CBPF)
+ sock_fanout_set_cbpf(fds[0]);
+ else if (type == PACKET_FANOUT_EBPF)
+ sock_fanout_set_ebpf(fds[0]);
+
+ rings[0] = sock_fanout_open_ring(fds[0]);
+ rings[1] = sock_fanout_open_ring(fds[1]);
+ pair_udp_open(fds_udp[0], PORT_BASE);
+ pair_udp_open(fds_udp[1], PORT_BASE + port_off);
+ sock_fanout_read(fds, rings, expect0);
+
+ /* Send data, but not enough to overflow a queue */
+ pair_udp_send(fds_udp[0], 15);
+ pair_udp_send_char(fds_udp[1], 5, DATA_CHAR_1);
+ ret = sock_fanout_read(fds, rings, expect1);
+
+ /* Send more data, overflow the queue */
+ pair_udp_send_char(fds_udp[0], 15, DATA_CHAR_1);
+ /* TODO: ensure consistent order between expect1 and expect2 */
+ ret |= sock_fanout_read(fds, rings, expect2);
+
+ if (munmap(rings[1], RING_NUM_FRAMES * getpagesize()) ||
+ munmap(rings[0], RING_NUM_FRAMES * getpagesize())) {
+ fprintf(stderr, "close rings\n");
+ exit(1);
+ }
+ if (close(fds_udp[1][1]) || close(fds_udp[1][0]) ||
+ close(fds_udp[0][1]) || close(fds_udp[0][0]) ||
+ close(fds[1]) || close(fds[0])) {
+ fprintf(stderr, "close datapath\n");
+ exit(1);
+ }
+
+ return ret;
+}
+
+static int set_cpuaffinity(int cpuid)
+{
+ cpu_set_t mask;
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpuid, &mask);
+ if (sched_setaffinity(0, sizeof(mask), &mask)) {
+ if (errno != EINVAL) {
+ fprintf(stderr, "setaffinity %d\n", cpuid);
+ exit(1);
+ }
+ return 1;
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } };
+ const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } };
+ const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } };
+ const int expect_rb[2][2] = { { 15, 5 }, { 20, 15 } };
+ const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } };
+ const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } };
+ const int expect_bpf[2][2] = { { 15, 5 }, { 15, 20 } };
+ const int expect_uniqueid[2][2] = { { 20, 20}, { 20, 20 } };
+ int port_off = 2, tries = 20, ret;
+
+ test_control_single();
+ test_control_group();
+ test_unique_fanout_group_ids();
+
+ /* find a set of ports that do not collide onto the same socket */
+ ret = test_datapath(PACKET_FANOUT_HASH, port_off,
+ expect_hash[0], expect_hash[1]);
+ while (ret) {
+ fprintf(stderr, "info: trying alternate ports (%d)\n", tries);
+ ret = test_datapath(PACKET_FANOUT_HASH, ++port_off,
+ expect_hash[0], expect_hash[1]);
+ if (!--tries) {
+ fprintf(stderr, "too many collisions\n");
+ return 1;
+ }
+ }
+
+ ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER,
+ port_off, expect_hash_rb[0], expect_hash_rb[1]);
+ ret |= test_datapath(PACKET_FANOUT_LB,
+ port_off, expect_lb[0], expect_lb[1]);
+ ret |= test_datapath(PACKET_FANOUT_ROLLOVER,
+ port_off, expect_rb[0], expect_rb[1]);
+
+ ret |= test_datapath(PACKET_FANOUT_CBPF,
+ port_off, expect_bpf[0], expect_bpf[1]);
+ ret |= test_datapath(PACKET_FANOUT_EBPF,
+ port_off, expect_bpf[0], expect_bpf[1]);
+
+ set_cpuaffinity(0);
+ ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
+ expect_cpu0[0], expect_cpu0[1]);
+ if (!set_cpuaffinity(1))
+ /* TODO: test that choice alternates with previous */
+ ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
+ expect_cpu1[0], expect_cpu1[1]);
+
+ ret |= test_datapath(PACKET_FANOUT_FLAG_UNIQUEID, port_off,
+ expect_uniqueid[0], expect_uniqueid[1]);
+
+ if (ret)
+ return 1;
+
+ printf("OK. All tests passed\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h
new file mode 100644
index 000000000..7d990d6c8
--- /dev/null
+++ b/tools/testing/selftests/net/psock_lib.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright 2013 Google Inc.
+ * Author: Willem de Bruijn <willemb@google.com>
+ * Daniel Borkmann <dborkman@redhat.com>
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef PSOCK_LIB_H
+#define PSOCK_LIB_H
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+
+#define DATA_LEN 100
+#define DATA_CHAR 'a'
+#define DATA_CHAR_1 'b'
+
+#define PORT_BASE 8000
+
+#ifndef __maybe_unused
+# define __maybe_unused __attribute__ ((__unused__))
+#endif
+
+static __maybe_unused void pair_udp_setfilter(int fd)
+{
+ /* the filter below checks for all of the following conditions that
+ * are based on the contents of create_payload()
+ * ether type 0x800 and
+ * ip proto udp and
+ * skb->len == DATA_LEN and
+ * udp[38] == 'a' or udp[38] == 'b'
+ * It can be generated from the following bpf_asm input:
+ * ldh [12]
+ * jne #0x800, drop ; ETH_P_IP
+ * ldb [23]
+ * jneq #17, drop ; IPPROTO_UDP
+ * ld len ; ld skb->len
+ * jlt #100, drop ; DATA_LEN
+ * ldb [80]
+ * jeq #97, pass ; DATA_CHAR
+ * jne #98, drop ; DATA_CHAR_1
+ * pass:
+ * ret #-1
+ * drop:
+ * ret #0
+ */
+ struct sock_filter bpf_filter[] = {
+ { 0x28, 0, 0, 0x0000000c },
+ { 0x15, 0, 8, 0x00000800 },
+ { 0x30, 0, 0, 0x00000017 },
+ { 0x15, 0, 6, 0x00000011 },
+ { 0x80, 0, 0, 0000000000 },
+ { 0x35, 0, 4, 0x00000064 },
+ { 0x30, 0, 0, 0x00000050 },
+ { 0x15, 1, 0, 0x00000061 },
+ { 0x15, 0, 1, 0x00000062 },
+ { 0x06, 0, 0, 0xffffffff },
+ { 0x06, 0, 0, 0000000000 },
+ };
+ struct sock_fprog bpf_prog;
+
+ bpf_prog.filter = bpf_filter;
+ bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog,
+ sizeof(bpf_prog))) {
+ perror("setsockopt SO_ATTACH_FILTER");
+ exit(1);
+ }
+}
+
+static __maybe_unused void pair_udp_open(int fds[], uint16_t port)
+{
+ struct sockaddr_in saddr, daddr;
+
+ fds[0] = socket(PF_INET, SOCK_DGRAM, 0);
+ fds[1] = socket(PF_INET, SOCK_DGRAM, 0);
+ if (fds[0] == -1 || fds[1] == -1) {
+ fprintf(stderr, "ERROR: socket dgram\n");
+ exit(1);
+ }
+
+ memset(&saddr, 0, sizeof(saddr));
+ saddr.sin_family = AF_INET;
+ saddr.sin_port = htons(port);
+ saddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+
+ memset(&daddr, 0, sizeof(daddr));
+ daddr.sin_family = AF_INET;
+ daddr.sin_port = htons(port + 1);
+ daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+
+ /* must bind both to get consistent hash result */
+ if (bind(fds[1], (void *) &daddr, sizeof(daddr))) {
+ perror("bind");
+ exit(1);
+ }
+ if (bind(fds[0], (void *) &saddr, sizeof(saddr))) {
+ perror("bind");
+ exit(1);
+ }
+ if (connect(fds[0], (void *) &daddr, sizeof(daddr))) {
+ perror("connect");
+ exit(1);
+ }
+}
+
+static __maybe_unused void pair_udp_send_char(int fds[], int num, char payload)
+{
+ char buf[DATA_LEN], rbuf[DATA_LEN];
+
+ memset(buf, payload, sizeof(buf));
+ while (num--) {
+ /* Should really handle EINTR and EAGAIN */
+ if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) {
+ fprintf(stderr, "ERROR: send failed left=%d\n", num);
+ exit(1);
+ }
+ if (read(fds[1], rbuf, sizeof(rbuf)) != sizeof(rbuf)) {
+ fprintf(stderr, "ERROR: recv failed left=%d\n", num);
+ exit(1);
+ }
+ if (memcmp(buf, rbuf, sizeof(buf))) {
+ fprintf(stderr, "ERROR: data failed left=%d\n", num);
+ exit(1);
+ }
+ }
+}
+
+static __maybe_unused void pair_udp_send(int fds[], int num)
+{
+ return pair_udp_send_char(fds, num, DATA_CHAR);
+}
+
+static __maybe_unused void pair_udp_close(int fds[])
+{
+ close(fds[0]);
+ close(fds[1]);
+}
+
+#endif /* PSOCK_LIB_H */
diff --git a/tools/testing/selftests/net/psock_snd.c b/tools/testing/selftests/net/psock_snd.c
new file mode 100644
index 000000000..7d15e10a9
--- /dev/null
+++ b/tools/testing/selftests/net/psock_snd.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/virtio_net.h>
+#include <net/if.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "psock_lib.h"
+
+static bool cfg_use_bind;
+static bool cfg_use_csum_off;
+static bool cfg_use_csum_off_bad;
+static bool cfg_use_dgram;
+static bool cfg_use_gso;
+static bool cfg_use_qdisc_bypass;
+static bool cfg_use_vlan;
+static bool cfg_use_vnet;
+
+static char *cfg_ifname = "lo";
+static int cfg_mtu = 1500;
+static int cfg_payload_len = DATA_LEN;
+static int cfg_truncate_len = INT_MAX;
+static uint16_t cfg_port = 8000;
+
+/* test sending up to max mtu + 1 */
+#define TEST_SZ (sizeof(struct virtio_net_hdr) + ETH_HLEN + ETH_MAX_MTU + 1)
+
+static char tbuf[TEST_SZ], rbuf[TEST_SZ];
+
+static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
+{
+ unsigned long sum = 0;
+ int i;
+
+ for (i = 0; i < num_u16; i++)
+ sum += start[i];
+
+ return sum;
+}
+
+static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
+ unsigned long sum)
+{
+ sum += add_csum_hword(start, num_u16);
+
+ while (sum >> 16)
+ sum = (sum & 0xffff) + (sum >> 16);
+
+ return ~sum;
+}
+
+static int build_vnet_header(void *header)
+{
+ struct virtio_net_hdr *vh = header;
+
+ vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr);
+
+ if (cfg_use_csum_off) {
+ vh->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vh->csum_start = ETH_HLEN + sizeof(struct iphdr);
+ vh->csum_offset = __builtin_offsetof(struct udphdr, check);
+
+ /* position check field exactly one byte beyond end of packet */
+ if (cfg_use_csum_off_bad)
+ vh->csum_start += sizeof(struct udphdr) + cfg_payload_len -
+ vh->csum_offset - 1;
+ }
+
+ if (cfg_use_gso) {
+ vh->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+ vh->gso_size = cfg_mtu - sizeof(struct iphdr);
+ }
+
+ return sizeof(*vh);
+}
+
+static int build_eth_header(void *header)
+{
+ struct ethhdr *eth = header;
+
+ if (cfg_use_vlan) {
+ uint16_t *tag = header + ETH_HLEN;
+
+ eth->h_proto = htons(ETH_P_8021Q);
+ tag[1] = htons(ETH_P_IP);
+ return ETH_HLEN + 4;
+ }
+
+ eth->h_proto = htons(ETH_P_IP);
+ return ETH_HLEN;
+}
+
+static int build_ipv4_header(void *header, int payload_len)
+{
+ struct iphdr *iph = header;
+
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->ttl = 8;
+ iph->tot_len = htons(sizeof(*iph) + sizeof(struct udphdr) + payload_len);
+ iph->id = htons(1337);
+ iph->protocol = IPPROTO_UDP;
+ iph->saddr = htonl((172 << 24) | (17 << 16) | 2);
+ iph->daddr = htonl((172 << 24) | (17 << 16) | 1);
+ iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
+
+ return iph->ihl << 2;
+}
+
+static int build_udp_header(void *header, int payload_len)
+{
+ const int alen = sizeof(uint32_t);
+ struct udphdr *udph = header;
+ int len = sizeof(*udph) + payload_len;
+
+ udph->source = htons(9);
+ udph->dest = htons(cfg_port);
+ udph->len = htons(len);
+
+ if (cfg_use_csum_off)
+ udph->check = build_ip_csum(header - (2 * alen), alen,
+ htons(IPPROTO_UDP) + udph->len);
+ else
+ udph->check = 0;
+
+ return sizeof(*udph);
+}
+
+static int build_packet(int payload_len)
+{
+ int off = 0;
+
+ off += build_vnet_header(tbuf);
+ off += build_eth_header(tbuf + off);
+ off += build_ipv4_header(tbuf + off, payload_len);
+ off += build_udp_header(tbuf + off, payload_len);
+
+ if (off + payload_len > sizeof(tbuf))
+ error(1, 0, "payload length exceeds max");
+
+ memset(tbuf + off, DATA_CHAR, payload_len);
+
+ return off + payload_len;
+}
+
+static void do_bind(int fd)
+{
+ struct sockaddr_ll laddr = {0};
+
+ laddr.sll_family = AF_PACKET;
+ laddr.sll_protocol = htons(ETH_P_IP);
+ laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+ if (!laddr.sll_ifindex)
+ error(1, errno, "if_nametoindex");
+
+ if (bind(fd, (void *)&laddr, sizeof(laddr)))
+ error(1, errno, "bind");
+}
+
+static void do_send(int fd, char *buf, int len)
+{
+ int ret;
+
+ if (!cfg_use_vnet) {
+ buf += sizeof(struct virtio_net_hdr);
+ len -= sizeof(struct virtio_net_hdr);
+ }
+ if (cfg_use_dgram) {
+ buf += ETH_HLEN;
+ len -= ETH_HLEN;
+ }
+
+ if (cfg_use_bind) {
+ ret = write(fd, buf, len);
+ } else {
+ struct sockaddr_ll laddr = {0};
+
+ laddr.sll_protocol = htons(ETH_P_IP);
+ laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+ if (!laddr.sll_ifindex)
+ error(1, errno, "if_nametoindex");
+
+ ret = sendto(fd, buf, len, 0, (void *)&laddr, sizeof(laddr));
+ }
+
+ if (ret == -1)
+ error(1, errno, "write");
+ if (ret != len)
+ error(1, 0, "write: %u %u", ret, len);
+
+ fprintf(stderr, "tx: %u\n", ret);
+}
+
+static int do_tx(void)
+{
+ const int one = 1;
+ int fd, len;
+
+ fd = socket(PF_PACKET, cfg_use_dgram ? SOCK_DGRAM : SOCK_RAW, 0);
+ if (fd == -1)
+ error(1, errno, "socket t");
+
+ if (cfg_use_bind)
+ do_bind(fd);
+
+ if (cfg_use_qdisc_bypass &&
+ setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &one, sizeof(one)))
+ error(1, errno, "setsockopt qdisc bypass");
+
+ if (cfg_use_vnet &&
+ setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one)))
+ error(1, errno, "setsockopt vnet");
+
+ len = build_packet(cfg_payload_len);
+
+ if (cfg_truncate_len < len)
+ len = cfg_truncate_len;
+
+ do_send(fd, tbuf, len);
+
+ if (close(fd))
+ error(1, errno, "close t");
+
+ return len;
+}
+
+static int setup_rx(void)
+{
+ struct timeval tv = { .tv_usec = 100 * 1000 };
+ struct sockaddr_in raddr = {0};
+ int fd;
+
+ fd = socket(PF_INET, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket r");
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+ error(1, errno, "setsockopt rcv timeout");
+
+ raddr.sin_family = AF_INET;
+ raddr.sin_port = htons(cfg_port);
+ raddr.sin_addr.s_addr = htonl(INADDR_ANY);
+
+ if (bind(fd, (void *)&raddr, sizeof(raddr)))
+ error(1, errno, "bind r");
+
+ return fd;
+}
+
+static void do_rx(int fd, int expected_len, char *expected)
+{
+ int ret;
+
+ ret = recv(fd, rbuf, sizeof(rbuf), 0);
+ if (ret == -1)
+ error(1, errno, "recv");
+ if (ret != expected_len)
+ error(1, 0, "recv: %u != %u", ret, expected_len);
+
+ if (memcmp(rbuf, expected, ret))
+ error(1, 0, "recv: data mismatch");
+
+ fprintf(stderr, "rx: %u\n", ret);
+}
+
+static int setup_sniffer(void)
+{
+ struct timeval tv = { .tv_usec = 100 * 1000 };
+ int fd;
+
+ fd = socket(PF_PACKET, SOCK_RAW, 0);
+ if (fd == -1)
+ error(1, errno, "socket p");
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+ error(1, errno, "setsockopt rcv timeout");
+
+ pair_udp_setfilter(fd);
+ do_bind(fd);
+
+ return fd;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "bcCdgl:qt:vV")) != -1) {
+ switch (c) {
+ case 'b':
+ cfg_use_bind = true;
+ break;
+ case 'c':
+ cfg_use_csum_off = true;
+ break;
+ case 'C':
+ cfg_use_csum_off_bad = true;
+ break;
+ case 'd':
+ cfg_use_dgram = true;
+ break;
+ case 'g':
+ cfg_use_gso = true;
+ break;
+ case 'l':
+ cfg_payload_len = strtoul(optarg, NULL, 0);
+ break;
+ case 'q':
+ cfg_use_qdisc_bypass = true;
+ break;
+ case 't':
+ cfg_truncate_len = strtoul(optarg, NULL, 0);
+ break;
+ case 'v':
+ cfg_use_vnet = true;
+ break;
+ case 'V':
+ cfg_use_vlan = true;
+ break;
+ default:
+ error(1, 0, "%s: parse error", argv[0]);
+ }
+ }
+
+ if (cfg_use_vlan && cfg_use_dgram)
+ error(1, 0, "option vlan (-V) conflicts with dgram (-d)");
+
+ if (cfg_use_csum_off && !cfg_use_vnet)
+ error(1, 0, "option csum offload (-c) requires vnet (-v)");
+
+ if (cfg_use_csum_off_bad && !cfg_use_csum_off)
+ error(1, 0, "option csum bad (-C) requires csum offload (-c)");
+
+ if (cfg_use_gso && !cfg_use_csum_off)
+ error(1, 0, "option gso (-g) requires csum offload (-c)");
+}
+
+static void run_test(void)
+{
+ int fdr, fds, total_len;
+
+ fdr = setup_rx();
+ fds = setup_sniffer();
+
+ total_len = do_tx();
+
+ /* BPF filter accepts only this length, vlan changes MAC */
+ if (cfg_payload_len == DATA_LEN && !cfg_use_vlan)
+ do_rx(fds, total_len - sizeof(struct virtio_net_hdr),
+ tbuf + sizeof(struct virtio_net_hdr));
+
+ do_rx(fdr, cfg_payload_len, tbuf + total_len - cfg_payload_len);
+
+ if (close(fds))
+ error(1, errno, "close s");
+ if (close(fdr))
+ error(1, errno, "close r");
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+
+ if (system("ip link set dev lo mtu 1500"))
+ error(1, errno, "ip link set mtu");
+ if (system("ip addr add dev lo 172.17.0.1/24"))
+ error(1, errno, "ip addr add");
+
+ run_test();
+
+ fprintf(stderr, "OK\n\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
new file mode 100755
index 000000000..6331d91b8
--- /dev/null
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of packet socket send regression tests
+
+set -e
+
+readonly mtu=1500
+readonly iphlen=20
+readonly udphlen=8
+
+readonly vnet_hlen=10
+readonly eth_hlen=14
+
+readonly mss="$((${mtu} - ${iphlen} - ${udphlen}))"
+readonly mss_exceeds="$((${mss} + 1))"
+
+readonly max_mtu=65535
+readonly max_mss="$((${max_mtu} - ${iphlen} - ${udphlen}))"
+readonly max_mss_exceeds="$((${max_mss} + 1))"
+
+# functional checks (not a full cross-product)
+
+echo "dgram"
+./in_netns.sh ./psock_snd -d
+
+echo "dgram bind"
+./in_netns.sh ./psock_snd -d -b
+
+echo "raw"
+./in_netns.sh ./psock_snd
+
+echo "raw bind"
+./in_netns.sh ./psock_snd -b
+
+echo "raw qdisc bypass"
+./in_netns.sh ./psock_snd -q
+
+echo "raw vlan"
+./in_netns.sh ./psock_snd -V
+
+echo "raw vnet hdr"
+./in_netns.sh ./psock_snd -v
+
+echo "raw csum_off"
+./in_netns.sh ./psock_snd -v -c
+
+echo "raw csum_off with bad offset (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -C)
+
+
+# bounds check: send {max, max + 1, min, min - 1} lengths
+
+echo "raw min size"
+./in_netns.sh ./psock_snd -l 0
+
+echo "raw mtu size"
+./in_netns.sh ./psock_snd -l "${mss}"
+
+echo "raw mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -l "${mss_exceeds}")
+
+# fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed
+#
+# echo "raw vlan mtu size"
+# ./in_netns.sh ./psock_snd -V -l "${mss}"
+
+echo "raw vlan mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}")
+
+echo "dgram mtu size"
+./in_netns.sh ./psock_snd -d -l "${mss}"
+
+echo "dgram mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}")
+
+echo "raw truncate hlen (fails: does not arrive)"
+(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))")
+
+echo "raw truncate hlen - 1 (fails: EINVAL)"
+(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))")
+
+
+# gso checks: implies -l, because with gso len must exceed gso_size
+
+echo "raw gso min size"
+./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
+
+echo "raw gso min size - 1 (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
+
+echo "raw gso max size"
+./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
+
+echo "raw gso max size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}")
+
+echo "OK. All tests passed"
diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
new file mode 100644
index 000000000..7ec4fa4d5
--- /dev/null
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -0,0 +1,864 @@
+/*
+ * Copyright 2013 Red Hat, Inc.
+ * Author: Daniel Borkmann <dborkman@redhat.com>
+ * Chetan Loke <loke.chetan@gmail.com> (TPACKET_V3 usage example)
+ *
+ * A basic test of packet socket's TPACKET_V1/TPACKET_V2/TPACKET_V3 behavior.
+ *
+ * Control:
+ * Test the setup of the TPACKET socket with different patterns that are
+ * known to fail (TODO) resp. succeed (OK).
+ *
+ * Datapath:
+ * Open a pair of packet sockets and send resp. receive an a priori known
+ * packet pattern accross the sockets and check if it was received resp.
+ * sent correctly. Fanout in combination with RX_RING is currently not
+ * tested here.
+ *
+ * The test currently runs for
+ * - TPACKET_V1: RX_RING, TX_RING
+ * - TPACKET_V2: RX_RING, TX_RING
+ * - TPACKET_V3: RX_RING
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <linux/if_packet.h>
+#include <linux/filter.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <bits/wordsize.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <arpa/inet.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+#include <net/if.h>
+#include <inttypes.h>
+#include <poll.h>
+
+#include "psock_lib.h"
+
+#include "../kselftest.h"
+
+#ifndef bug_on
+# define bug_on(cond) assert(!(cond))
+#endif
+
+#ifndef __aligned_tpacket
+# define __aligned_tpacket __attribute__((aligned(TPACKET_ALIGNMENT)))
+#endif
+
+#ifndef __align_tpacket
+# define __align_tpacket(x) __attribute__((aligned(TPACKET_ALIGN(x))))
+#endif
+
+#define NUM_PACKETS 100
+#define ALIGN_8(x) (((x) + 8 - 1) & ~(8 - 1))
+
+struct ring {
+ struct iovec *rd;
+ uint8_t *mm_space;
+ size_t mm_len, rd_len;
+ struct sockaddr_ll ll;
+ void (*walk)(int sock, struct ring *ring);
+ int type, rd_num, flen, version;
+ union {
+ struct tpacket_req req;
+ struct tpacket_req3 req3;
+ };
+};
+
+struct block_desc {
+ uint32_t version;
+ uint32_t offset_to_priv;
+ struct tpacket_hdr_v1 h1;
+};
+
+union frame_map {
+ struct {
+ struct tpacket_hdr tp_h __aligned_tpacket;
+ struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket_hdr));
+ } *v1;
+ struct {
+ struct tpacket2_hdr tp_h __aligned_tpacket;
+ struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket2_hdr));
+ } *v2;
+ void *raw;
+};
+
+static unsigned int total_packets, total_bytes;
+
+static int pfsocket(int ver)
+{
+ int ret, sock = socket(PF_PACKET, SOCK_RAW, 0);
+ if (sock == -1) {
+ perror("socket");
+ exit(1);
+ }
+
+ ret = setsockopt(sock, SOL_PACKET, PACKET_VERSION, &ver, sizeof(ver));
+ if (ret == -1) {
+ perror("setsockopt");
+ exit(1);
+ }
+
+ return sock;
+}
+
+static void status_bar_update(void)
+{
+ if (total_packets % 10 == 0) {
+ fprintf(stderr, ".");
+ fflush(stderr);
+ }
+}
+
+static void test_payload(void *pay, size_t len)
+{
+ struct ethhdr *eth = pay;
+
+ if (len < sizeof(struct ethhdr)) {
+ fprintf(stderr, "test_payload: packet too "
+ "small: %zu bytes!\n", len);
+ exit(1);
+ }
+
+ if (eth->h_proto != htons(ETH_P_IP)) {
+ fprintf(stderr, "test_payload: wrong ethernet "
+ "type: 0x%x!\n", ntohs(eth->h_proto));
+ exit(1);
+ }
+}
+
+static void create_payload(void *pay, size_t *len)
+{
+ int i;
+ struct ethhdr *eth = pay;
+ struct iphdr *ip = pay + sizeof(*eth);
+
+ /* Lets create some broken crap, that still passes
+ * our BPF filter.
+ */
+
+ *len = DATA_LEN + 42;
+
+ memset(pay, 0xff, ETH_ALEN * 2);
+ eth->h_proto = htons(ETH_P_IP);
+
+ for (i = 0; i < sizeof(*ip); ++i)
+ ((uint8_t *) pay)[i + sizeof(*eth)] = (uint8_t) rand();
+
+ ip->ihl = 5;
+ ip->version = 4;
+ ip->protocol = 0x11;
+ ip->frag_off = 0;
+ ip->ttl = 64;
+ ip->tot_len = htons((uint16_t) *len - sizeof(*eth));
+
+ ip->saddr = htonl(INADDR_LOOPBACK);
+ ip->daddr = htonl(INADDR_LOOPBACK);
+
+ memset(pay + sizeof(*eth) + sizeof(*ip),
+ DATA_CHAR, DATA_LEN);
+}
+
+static inline int __v1_rx_kernel_ready(struct tpacket_hdr *hdr)
+{
+ return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
+}
+
+static inline void __v1_rx_user_ready(struct tpacket_hdr *hdr)
+{
+ hdr->tp_status = TP_STATUS_KERNEL;
+ __sync_synchronize();
+}
+
+static inline int __v2_rx_kernel_ready(struct tpacket2_hdr *hdr)
+{
+ return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
+}
+
+static inline void __v2_rx_user_ready(struct tpacket2_hdr *hdr)
+{
+ hdr->tp_status = TP_STATUS_KERNEL;
+ __sync_synchronize();
+}
+
+static inline int __v1_v2_rx_kernel_ready(void *base, int version)
+{
+ switch (version) {
+ case TPACKET_V1:
+ return __v1_rx_kernel_ready(base);
+ case TPACKET_V2:
+ return __v2_rx_kernel_ready(base);
+ default:
+ bug_on(1);
+ return 0;
+ }
+}
+
+static inline void __v1_v2_rx_user_ready(void *base, int version)
+{
+ switch (version) {
+ case TPACKET_V1:
+ __v1_rx_user_ready(base);
+ break;
+ case TPACKET_V2:
+ __v2_rx_user_ready(base);
+ break;
+ }
+}
+
+static void walk_v1_v2_rx(int sock, struct ring *ring)
+{
+ struct pollfd pfd;
+ int udp_sock[2];
+ union frame_map ppd;
+ unsigned int frame_num = 0;
+
+ bug_on(ring->type != PACKET_RX_RING);
+
+ pair_udp_open(udp_sock, PORT_BASE);
+
+ memset(&pfd, 0, sizeof(pfd));
+ pfd.fd = sock;
+ pfd.events = POLLIN | POLLERR;
+ pfd.revents = 0;
+
+ pair_udp_send(udp_sock, NUM_PACKETS);
+
+ while (total_packets < NUM_PACKETS * 2) {
+ while (__v1_v2_rx_kernel_ready(ring->rd[frame_num].iov_base,
+ ring->version)) {
+ ppd.raw = ring->rd[frame_num].iov_base;
+
+ switch (ring->version) {
+ case TPACKET_V1:
+ test_payload((uint8_t *) ppd.raw + ppd.v1->tp_h.tp_mac,
+ ppd.v1->tp_h.tp_snaplen);
+ total_bytes += ppd.v1->tp_h.tp_snaplen;
+ break;
+
+ case TPACKET_V2:
+ test_payload((uint8_t *) ppd.raw + ppd.v2->tp_h.tp_mac,
+ ppd.v2->tp_h.tp_snaplen);
+ total_bytes += ppd.v2->tp_h.tp_snaplen;
+ break;
+ }
+
+ status_bar_update();
+ total_packets++;
+
+ __v1_v2_rx_user_ready(ppd.raw, ring->version);
+
+ frame_num = (frame_num + 1) % ring->rd_num;
+ }
+
+ poll(&pfd, 1, 1);
+ }
+
+ pair_udp_close(udp_sock);
+
+ if (total_packets != 2 * NUM_PACKETS) {
+ fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
+ ring->version, total_packets, NUM_PACKETS);
+ exit(1);
+ }
+
+ fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
+}
+
+static inline int __v1_tx_kernel_ready(struct tpacket_hdr *hdr)
+{
+ return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
+}
+
+static inline void __v1_tx_user_ready(struct tpacket_hdr *hdr)
+{
+ hdr->tp_status = TP_STATUS_SEND_REQUEST;
+ __sync_synchronize();
+}
+
+static inline int __v2_tx_kernel_ready(struct tpacket2_hdr *hdr)
+{
+ return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
+}
+
+static inline void __v2_tx_user_ready(struct tpacket2_hdr *hdr)
+{
+ hdr->tp_status = TP_STATUS_SEND_REQUEST;
+ __sync_synchronize();
+}
+
+static inline int __v3_tx_kernel_ready(struct tpacket3_hdr *hdr)
+{
+ return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
+}
+
+static inline void __v3_tx_user_ready(struct tpacket3_hdr *hdr)
+{
+ hdr->tp_status = TP_STATUS_SEND_REQUEST;
+ __sync_synchronize();
+}
+
+static inline int __tx_kernel_ready(void *base, int version)
+{
+ switch (version) {
+ case TPACKET_V1:
+ return __v1_tx_kernel_ready(base);
+ case TPACKET_V2:
+ return __v2_tx_kernel_ready(base);
+ case TPACKET_V3:
+ return __v3_tx_kernel_ready(base);
+ default:
+ bug_on(1);
+ return 0;
+ }
+}
+
+static inline void __tx_user_ready(void *base, int version)
+{
+ switch (version) {
+ case TPACKET_V1:
+ __v1_tx_user_ready(base);
+ break;
+ case TPACKET_V2:
+ __v2_tx_user_ready(base);
+ break;
+ case TPACKET_V3:
+ __v3_tx_user_ready(base);
+ break;
+ }
+}
+
+static void __v1_v2_set_packet_loss_discard(int sock)
+{
+ int ret, discard = 1;
+
+ ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *) &discard,
+ sizeof(discard));
+ if (ret == -1) {
+ perror("setsockopt");
+ exit(1);
+ }
+}
+
+static inline void *get_next_frame(struct ring *ring, int n)
+{
+ uint8_t *f0 = ring->rd[0].iov_base;
+
+ switch (ring->version) {
+ case TPACKET_V1:
+ case TPACKET_V2:
+ return ring->rd[n].iov_base;
+ case TPACKET_V3:
+ return f0 + (n * ring->req3.tp_frame_size);
+ default:
+ bug_on(1);
+ }
+}
+
+static void walk_tx(int sock, struct ring *ring)
+{
+ struct pollfd pfd;
+ int rcv_sock, ret;
+ size_t packet_len;
+ union frame_map ppd;
+ char packet[1024];
+ unsigned int frame_num = 0, got = 0;
+ struct sockaddr_ll ll = {
+ .sll_family = PF_PACKET,
+ .sll_halen = ETH_ALEN,
+ };
+ int nframes;
+
+ /* TPACKET_V{1,2} sets up the ring->rd* related variables based
+ * on frames (e.g., rd_num is tp_frame_nr) whereas V3 sets these
+ * up based on blocks (e.g, rd_num is tp_block_nr)
+ */
+ if (ring->version <= TPACKET_V2)
+ nframes = ring->rd_num;
+ else
+ nframes = ring->req3.tp_frame_nr;
+
+ bug_on(ring->type != PACKET_TX_RING);
+ bug_on(nframes < NUM_PACKETS);
+
+ rcv_sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+ if (rcv_sock == -1) {
+ perror("socket");
+ exit(1);
+ }
+
+ pair_udp_setfilter(rcv_sock);
+
+ ll.sll_ifindex = if_nametoindex("lo");
+ ret = bind(rcv_sock, (struct sockaddr *) &ll, sizeof(ll));
+ if (ret == -1) {
+ perror("bind");
+ exit(1);
+ }
+
+ memset(&pfd, 0, sizeof(pfd));
+ pfd.fd = sock;
+ pfd.events = POLLOUT | POLLERR;
+ pfd.revents = 0;
+
+ total_packets = NUM_PACKETS;
+ create_payload(packet, &packet_len);
+
+ while (total_packets > 0) {
+ void *next = get_next_frame(ring, frame_num);
+
+ while (__tx_kernel_ready(next, ring->version) &&
+ total_packets > 0) {
+ ppd.raw = next;
+
+ switch (ring->version) {
+ case TPACKET_V1:
+ ppd.v1->tp_h.tp_snaplen = packet_len;
+ ppd.v1->tp_h.tp_len = packet_len;
+
+ memcpy((uint8_t *) ppd.raw + TPACKET_HDRLEN -
+ sizeof(struct sockaddr_ll), packet,
+ packet_len);
+ total_bytes += ppd.v1->tp_h.tp_snaplen;
+ break;
+
+ case TPACKET_V2:
+ ppd.v2->tp_h.tp_snaplen = packet_len;
+ ppd.v2->tp_h.tp_len = packet_len;
+
+ memcpy((uint8_t *) ppd.raw + TPACKET2_HDRLEN -
+ sizeof(struct sockaddr_ll), packet,
+ packet_len);
+ total_bytes += ppd.v2->tp_h.tp_snaplen;
+ break;
+ case TPACKET_V3: {
+ struct tpacket3_hdr *tx = next;
+
+ tx->tp_snaplen = packet_len;
+ tx->tp_len = packet_len;
+ tx->tp_next_offset = 0;
+
+ memcpy((uint8_t *)tx + TPACKET3_HDRLEN -
+ sizeof(struct sockaddr_ll), packet,
+ packet_len);
+ total_bytes += tx->tp_snaplen;
+ break;
+ }
+ }
+
+ status_bar_update();
+ total_packets--;
+
+ __tx_user_ready(next, ring->version);
+
+ frame_num = (frame_num + 1) % nframes;
+ }
+
+ poll(&pfd, 1, 1);
+ }
+
+ bug_on(total_packets != 0);
+
+ ret = sendto(sock, NULL, 0, 0, NULL, 0);
+ if (ret == -1) {
+ perror("sendto");
+ exit(1);
+ }
+
+ while ((ret = recvfrom(rcv_sock, packet, sizeof(packet),
+ 0, NULL, NULL)) > 0 &&
+ total_packets < NUM_PACKETS) {
+ got += ret;
+ test_payload(packet, ret);
+
+ status_bar_update();
+ total_packets++;
+ }
+
+ close(rcv_sock);
+
+ if (total_packets != NUM_PACKETS) {
+ fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
+ ring->version, total_packets, NUM_PACKETS);
+ exit(1);
+ }
+
+ fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, got);
+}
+
+static void walk_v1_v2(int sock, struct ring *ring)
+{
+ if (ring->type == PACKET_RX_RING)
+ walk_v1_v2_rx(sock, ring);
+ else
+ walk_tx(sock, ring);
+}
+
+static uint64_t __v3_prev_block_seq_num = 0;
+
+void __v3_test_block_seq_num(struct block_desc *pbd)
+{
+ if (__v3_prev_block_seq_num + 1 != pbd->h1.seq_num) {
+ fprintf(stderr, "\nprev_block_seq_num:%"PRIu64", expected "
+ "seq:%"PRIu64" != actual seq:%"PRIu64"\n",
+ __v3_prev_block_seq_num, __v3_prev_block_seq_num + 1,
+ (uint64_t) pbd->h1.seq_num);
+ exit(1);
+ }
+
+ __v3_prev_block_seq_num = pbd->h1.seq_num;
+}
+
+static void __v3_test_block_len(struct block_desc *pbd, uint32_t bytes, int block_num)
+{
+ if (pbd->h1.num_pkts && bytes != pbd->h1.blk_len) {
+ fprintf(stderr, "\nblock:%u with %upackets, expected "
+ "len:%u != actual len:%u\n", block_num,
+ pbd->h1.num_pkts, bytes, pbd->h1.blk_len);
+ exit(1);
+ }
+}
+
+static void __v3_test_block_header(struct block_desc *pbd, const int block_num)
+{
+ if ((pbd->h1.block_status & TP_STATUS_USER) == 0) {
+ fprintf(stderr, "\nblock %u: not in TP_STATUS_USER\n", block_num);
+ exit(1);
+ }
+
+ __v3_test_block_seq_num(pbd);
+}
+
+static void __v3_walk_block(struct block_desc *pbd, const int block_num)
+{
+ int num_pkts = pbd->h1.num_pkts, i;
+ unsigned long bytes = 0, bytes_with_padding = ALIGN_8(sizeof(*pbd));
+ struct tpacket3_hdr *ppd;
+
+ __v3_test_block_header(pbd, block_num);
+
+ ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd +
+ pbd->h1.offset_to_first_pkt);
+
+ for (i = 0; i < num_pkts; ++i) {
+ bytes += ppd->tp_snaplen;
+
+ if (ppd->tp_next_offset)
+ bytes_with_padding += ppd->tp_next_offset;
+ else
+ bytes_with_padding += ALIGN_8(ppd->tp_snaplen + ppd->tp_mac);
+
+ test_payload((uint8_t *) ppd + ppd->tp_mac, ppd->tp_snaplen);
+
+ status_bar_update();
+ total_packets++;
+
+ ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + ppd->tp_next_offset);
+ __sync_synchronize();
+ }
+
+ __v3_test_block_len(pbd, bytes_with_padding, block_num);
+ total_bytes += bytes;
+}
+
+void __v3_flush_block(struct block_desc *pbd)
+{
+ pbd->h1.block_status = TP_STATUS_KERNEL;
+ __sync_synchronize();
+}
+
+static void walk_v3_rx(int sock, struct ring *ring)
+{
+ unsigned int block_num = 0;
+ struct pollfd pfd;
+ struct block_desc *pbd;
+ int udp_sock[2];
+
+ bug_on(ring->type != PACKET_RX_RING);
+
+ pair_udp_open(udp_sock, PORT_BASE);
+
+ memset(&pfd, 0, sizeof(pfd));
+ pfd.fd = sock;
+ pfd.events = POLLIN | POLLERR;
+ pfd.revents = 0;
+
+ pair_udp_send(udp_sock, NUM_PACKETS);
+
+ while (total_packets < NUM_PACKETS * 2) {
+ pbd = (struct block_desc *) ring->rd[block_num].iov_base;
+
+ while ((pbd->h1.block_status & TP_STATUS_USER) == 0)
+ poll(&pfd, 1, 1);
+
+ __v3_walk_block(pbd, block_num);
+ __v3_flush_block(pbd);
+
+ block_num = (block_num + 1) % ring->rd_num;
+ }
+
+ pair_udp_close(udp_sock);
+
+ if (total_packets != 2 * NUM_PACKETS) {
+ fprintf(stderr, "walk_v3_rx: received %u out of %u pkts\n",
+ total_packets, NUM_PACKETS);
+ exit(1);
+ }
+
+ fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
+}
+
+static void walk_v3(int sock, struct ring *ring)
+{
+ if (ring->type == PACKET_RX_RING)
+ walk_v3_rx(sock, ring);
+ else
+ walk_tx(sock, ring);
+}
+
+static void __v1_v2_fill(struct ring *ring, unsigned int blocks)
+{
+ ring->req.tp_block_size = getpagesize() << 2;
+ ring->req.tp_frame_size = TPACKET_ALIGNMENT << 7;
+ ring->req.tp_block_nr = blocks;
+
+ ring->req.tp_frame_nr = ring->req.tp_block_size /
+ ring->req.tp_frame_size *
+ ring->req.tp_block_nr;
+
+ ring->mm_len = ring->req.tp_block_size * ring->req.tp_block_nr;
+ ring->walk = walk_v1_v2;
+ ring->rd_num = ring->req.tp_frame_nr;
+ ring->flen = ring->req.tp_frame_size;
+}
+
+static void __v3_fill(struct ring *ring, unsigned int blocks, int type)
+{
+ if (type == PACKET_RX_RING) {
+ ring->req3.tp_retire_blk_tov = 64;
+ ring->req3.tp_sizeof_priv = 0;
+ ring->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+ }
+ ring->req3.tp_block_size = getpagesize() << 2;
+ ring->req3.tp_frame_size = TPACKET_ALIGNMENT << 7;
+ ring->req3.tp_block_nr = blocks;
+
+ ring->req3.tp_frame_nr = ring->req3.tp_block_size /
+ ring->req3.tp_frame_size *
+ ring->req3.tp_block_nr;
+
+ ring->mm_len = ring->req3.tp_block_size * ring->req3.tp_block_nr;
+ ring->walk = walk_v3;
+ ring->rd_num = ring->req3.tp_block_nr;
+ ring->flen = ring->req3.tp_block_size;
+}
+
+static void setup_ring(int sock, struct ring *ring, int version, int type)
+{
+ int ret = 0;
+ unsigned int blocks = 256;
+
+ ring->type = type;
+ ring->version = version;
+
+ switch (version) {
+ case TPACKET_V1:
+ case TPACKET_V2:
+ if (type == PACKET_TX_RING)
+ __v1_v2_set_packet_loss_discard(sock);
+ __v1_v2_fill(ring, blocks);
+ ret = setsockopt(sock, SOL_PACKET, type, &ring->req,
+ sizeof(ring->req));
+ break;
+
+ case TPACKET_V3:
+ __v3_fill(ring, blocks, type);
+ ret = setsockopt(sock, SOL_PACKET, type, &ring->req3,
+ sizeof(ring->req3));
+ break;
+ }
+
+ if (ret == -1) {
+ perror("setsockopt");
+ exit(1);
+ }
+
+ ring->rd_len = ring->rd_num * sizeof(*ring->rd);
+ ring->rd = malloc(ring->rd_len);
+ if (ring->rd == NULL) {
+ perror("malloc");
+ exit(1);
+ }
+
+ total_packets = 0;
+ total_bytes = 0;
+}
+
+static void mmap_ring(int sock, struct ring *ring)
+{
+ int i;
+
+ ring->mm_space = mmap(0, ring->mm_len, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock, 0);
+ if (ring->mm_space == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ memset(ring->rd, 0, ring->rd_len);
+ for (i = 0; i < ring->rd_num; ++i) {
+ ring->rd[i].iov_base = ring->mm_space + (i * ring->flen);
+ ring->rd[i].iov_len = ring->flen;
+ }
+}
+
+static void bind_ring(int sock, struct ring *ring)
+{
+ int ret;
+
+ pair_udp_setfilter(sock);
+
+ ring->ll.sll_family = PF_PACKET;
+ ring->ll.sll_protocol = htons(ETH_P_ALL);
+ ring->ll.sll_ifindex = if_nametoindex("lo");
+ ring->ll.sll_hatype = 0;
+ ring->ll.sll_pkttype = 0;
+ ring->ll.sll_halen = 0;
+
+ ret = bind(sock, (struct sockaddr *) &ring->ll, sizeof(ring->ll));
+ if (ret == -1) {
+ perror("bind");
+ exit(1);
+ }
+}
+
+static void walk_ring(int sock, struct ring *ring)
+{
+ ring->walk(sock, ring);
+}
+
+static void unmap_ring(int sock, struct ring *ring)
+{
+ munmap(ring->mm_space, ring->mm_len);
+ free(ring->rd);
+}
+
+static int test_kernel_bit_width(void)
+{
+ char in[512], *ptr;
+ int num = 0, fd;
+ ssize_t ret;
+
+ fd = open("/proc/kallsyms", O_RDONLY);
+ if (fd == -1) {
+ perror("open");
+ exit(1);
+ }
+
+ ret = read(fd, in, sizeof(in));
+ if (ret <= 0) {
+ perror("read");
+ exit(1);
+ }
+
+ close(fd);
+
+ ptr = in;
+ while(!isspace(*ptr)) {
+ num++;
+ ptr++;
+ }
+
+ return num * 4;
+}
+
+static int test_user_bit_width(void)
+{
+ return __WORDSIZE;
+}
+
+static const char *tpacket_str[] = {
+ [TPACKET_V1] = "TPACKET_V1",
+ [TPACKET_V2] = "TPACKET_V2",
+ [TPACKET_V3] = "TPACKET_V3",
+};
+
+static const char *type_str[] = {
+ [PACKET_RX_RING] = "PACKET_RX_RING",
+ [PACKET_TX_RING] = "PACKET_TX_RING",
+};
+
+static int test_tpacket(int version, int type)
+{
+ int sock;
+ struct ring ring;
+
+ fprintf(stderr, "test: %s with %s ", tpacket_str[version],
+ type_str[type]);
+ fflush(stderr);
+
+ if (version == TPACKET_V1 &&
+ test_kernel_bit_width() != test_user_bit_width()) {
+ fprintf(stderr, "test: skip %s %s since user and kernel "
+ "space have different bit width\n",
+ tpacket_str[version], type_str[type]);
+ return KSFT_SKIP;
+ }
+
+ sock = pfsocket(version);
+ memset(&ring, 0, sizeof(ring));
+ setup_ring(sock, &ring, version, type);
+ mmap_ring(sock, &ring);
+ bind_ring(sock, &ring);
+ walk_ring(sock, &ring);
+ unmap_ring(sock, &ring);
+ close(sock);
+
+ fprintf(stderr, "\n");
+ return 0;
+}
+
+int main(void)
+{
+ int ret = 0;
+
+ ret |= test_tpacket(TPACKET_V1, PACKET_RX_RING);
+ ret |= test_tpacket(TPACKET_V1, PACKET_TX_RING);
+
+ ret |= test_tpacket(TPACKET_V2, PACKET_RX_RING);
+ ret |= test_tpacket(TPACKET_V2, PACKET_TX_RING);
+
+ ret |= test_tpacket(TPACKET_V3, PACKET_RX_RING);
+ ret |= test_tpacket(TPACKET_V3, PACKET_TX_RING);
+
+ if (ret)
+ return 1;
+
+ printf("OK. All tests passed\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c
new file mode 100644
index 000000000..7c5b12664
--- /dev/null
+++ b/tools/testing/selftests/net/reuseaddr_conflict.c
@@ -0,0 +1,114 @@
+/*
+ * Test for the regression introduced by
+ *
+ * b9470c27607b ("inet: kill smallest_size and smallest_port")
+ *
+ * If we open an ipv4 socket on a port with reuseaddr we shouldn't reset the tb
+ * when we open the ipv6 conterpart, which is what was happening previously.
+ */
+#include <errno.h>
+#include <error.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define PORT 9999
+
+int open_port(int ipv6, int any)
+{
+ int fd = -1;
+ int reuseaddr = 1;
+ int v6only = 1;
+ int addrlen;
+ int ret = -1;
+ struct sockaddr *addr;
+ int family = ipv6 ? AF_INET6 : AF_INET;
+
+ struct sockaddr_in6 addr6 = {
+ .sin6_family = AF_INET6,
+ .sin6_port = htons(PORT),
+ .sin6_addr = in6addr_any
+ };
+ struct sockaddr_in addr4 = {
+ .sin_family = AF_INET,
+ .sin_port = htons(PORT),
+ .sin_addr.s_addr = any ? htonl(INADDR_ANY) : inet_addr("127.0.0.1"),
+ };
+
+
+ if (ipv6) {
+ addr = (struct sockaddr*)&addr6;
+ addrlen = sizeof(addr6);
+ } else {
+ addr = (struct sockaddr*)&addr4;
+ addrlen = sizeof(addr4);
+ }
+
+ if ((fd = socket(family, SOCK_STREAM, IPPROTO_TCP)) < 0) {
+ perror("socket");
+ goto out;
+ }
+
+ if (ipv6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (void*)&v6only,
+ sizeof(v6only)) < 0) {
+ perror("setsockopt IPV6_V6ONLY");
+ goto out;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr,
+ sizeof(reuseaddr)) < 0) {
+ perror("setsockopt SO_REUSEADDR");
+ goto out;
+ }
+
+ if (bind(fd, addr, addrlen) < 0) {
+ perror("bind");
+ goto out;
+ }
+
+ if (any)
+ return fd;
+
+ if (listen(fd, 1) < 0) {
+ perror("listen");
+ goto out;
+ }
+ return fd;
+out:
+ close(fd);
+ return ret;
+}
+
+int main(void)
+{
+ int listenfd;
+ int fd1, fd2;
+
+ fprintf(stderr, "Opening 127.0.0.1:%d\n", PORT);
+ listenfd = open_port(0, 0);
+ if (listenfd < 0)
+ error(1, errno, "Couldn't open listen socket");
+ fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
+ fd1 = open_port(0, 1);
+ if (fd1 >= 0)
+ error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
+ fprintf(stderr, "Opening in6addr_any:%d\n", PORT);
+ fd1 = open_port(1, 1);
+ if (fd1 < 0)
+ error(1, errno, "Couldn't open ipv6 reuseport");
+ fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
+ fd2 = open_port(0, 1);
+ if (fd2 >= 0)
+ error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
+ close(fd1);
+ fprintf(stderr, "Opening INADDR_ANY:%d after closing ipv6 socket\n", PORT);
+ fd1 = open_port(0, 1);
+ if (fd1 >= 0)
+ error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
+ fprintf(stderr, "Success");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
new file mode 100644
index 000000000..b5277106d
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -0,0 +1,641 @@
+/*
+ * Test functionality of BPF filters for SO_REUSEPORT. The tests below will use
+ * a BPF program (both classic and extended) to read the first word from an
+ * incoming packet (expected to be in network byte-order), calculate a modulus
+ * of that number, and then dispatch the packet to the Nth socket using the
+ * result. These tests are run for each supported address family and protocol.
+ * Additionally, a few edge cases in the implementation are tested.
+ */
+
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/unistd.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+struct test_params {
+ int recv_family;
+ int send_family;
+ int protocol;
+ size_t recv_socks;
+ uint16_t recv_port;
+ uint16_t send_port_min;
+};
+
+static size_t sockaddr_size(void)
+{
+ return sizeof(struct sockaddr_storage);
+}
+
+static struct sockaddr *new_any_sockaddr(int family, uint16_t port)
+{
+ struct sockaddr_storage *addr;
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+
+ addr = malloc(sizeof(struct sockaddr_storage));
+ memset(addr, 0, sizeof(struct sockaddr_storage));
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)addr;
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ addr4->sin_port = htons(port);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)addr;
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_any;
+ addr6->sin6_port = htons(port);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+ return (struct sockaddr *)addr;
+}
+
+static struct sockaddr *new_loopback_sockaddr(int family, uint16_t port)
+{
+ struct sockaddr *addr = new_any_sockaddr(family, port);
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)addr;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)addr;
+ addr6->sin6_addr = in6addr_loopback;
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+ return addr;
+}
+
+static void attach_ebpf(int fd, uint16_t mod)
+{
+ static char bpf_log_buf[65536];
+ static const char bpf_license[] = "GPL";
+
+ int bpf_fd;
+ const struct bpf_insn prog[] = {
+ /* BPF_MOV64_REG(BPF_REG_6, BPF_REG_1) */
+ { BPF_ALU64 | BPF_MOV | BPF_X, BPF_REG_6, BPF_REG_1, 0, 0 },
+ /* BPF_LD_ABS(BPF_W, 0) R0 = (uint32_t)skb[0] */
+ { BPF_LD | BPF_ABS | BPF_W, 0, 0, 0, 0 },
+ /* BPF_ALU64_IMM(BPF_MOD, BPF_REG_0, mod) */
+ { BPF_ALU64 | BPF_MOD | BPF_K, BPF_REG_0, 0, 0, mod },
+ /* BPF_EXIT_INSN() */
+ { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+ };
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insn_cnt = ARRAY_SIZE(prog);
+ attr.insns = (unsigned long) &prog;
+ attr.license = (unsigned long) &bpf_license;
+ attr.log_buf = (unsigned long) &bpf_log_buf;
+ attr.log_size = sizeof(bpf_log_buf);
+ attr.log_level = 1;
+ attr.kern_version = 0;
+
+ bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (bpf_fd < 0)
+ error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf);
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
+ sizeof(bpf_fd)))
+ error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
+
+ close(bpf_fd);
+}
+
+static void attach_cbpf(int fd, uint16_t mod)
+{
+ struct sock_filter code[] = {
+ /* A = (uint32_t)skb[0] */
+ { BPF_LD | BPF_W | BPF_ABS, 0, 0, 0 },
+ /* A = A % mod */
+ { BPF_ALU | BPF_MOD, 0, 0, mod },
+ /* return A */
+ { BPF_RET | BPF_A, 0, 0, 0 },
+ };
+ struct sock_fprog p = {
+ .len = ARRAY_SIZE(code),
+ .filter = code,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
+ error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
+}
+
+static void build_recv_group(const struct test_params p, int fd[], uint16_t mod,
+ void (*attach_bpf)(int, uint16_t))
+{
+ struct sockaddr * const addr =
+ new_any_sockaddr(p.recv_family, p.recv_port);
+ int i, opt;
+
+ for (i = 0; i < p.recv_socks; ++i) {
+ fd[i] = socket(p.recv_family, p.protocol, 0);
+ if (fd[i] < 0)
+ error(1, errno, "failed to create recv %d", i);
+
+ opt = 1;
+ if (setsockopt(fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on %d", i);
+
+ if (i == 0)
+ attach_bpf(fd[i], mod);
+
+ if (bind(fd[i], addr, sockaddr_size()))
+ error(1, errno, "failed to bind recv socket %d", i);
+
+ if (p.protocol == SOCK_STREAM) {
+ opt = 4;
+ if (setsockopt(fd[i], SOL_TCP, TCP_FASTOPEN, &opt,
+ sizeof(opt)))
+ error(1, errno,
+ "failed to set TCP_FASTOPEN on %d", i);
+ if (listen(fd[i], p.recv_socks * 10))
+ error(1, errno, "failed to listen on socket");
+ }
+ }
+ free(addr);
+}
+
+static void send_from(struct test_params p, uint16_t sport, char *buf,
+ size_t len)
+{
+ struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport);
+ struct sockaddr * const daddr =
+ new_loopback_sockaddr(p.send_family, p.recv_port);
+ const int fd = socket(p.send_family, p.protocol, 0), one = 1;
+
+ if (fd < 0)
+ error(1, errno, "failed to create send socket");
+
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)))
+ error(1, errno, "failed to set reuseaddr");
+
+ if (bind(fd, saddr, sockaddr_size()))
+ error(1, errno, "failed to bind send socket");
+
+ if (sendto(fd, buf, len, MSG_FASTOPEN, daddr, sockaddr_size()) < 0)
+ error(1, errno, "failed to send message");
+
+ close(fd);
+ free(saddr);
+ free(daddr);
+}
+
+static void test_recv_order(const struct test_params p, int fd[], int mod)
+{
+ char recv_buf[8], send_buf[8];
+ struct msghdr msg;
+ struct iovec recv_io = { recv_buf, 8 };
+ struct epoll_event ev;
+ int epfd, conn, i, sport, expected;
+ uint32_t data, ndata;
+
+ epfd = epoll_create(1);
+ if (epfd < 0)
+ error(1, errno, "failed to create epoll");
+ for (i = 0; i < p.recv_socks; ++i) {
+ ev.events = EPOLLIN;
+ ev.data.fd = fd[i];
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd[i], &ev))
+ error(1, errno, "failed to register sock %d epoll", i);
+ }
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = &recv_io;
+ msg.msg_iovlen = 1;
+
+ for (data = 0; data < p.recv_socks * 2; ++data) {
+ sport = p.send_port_min + data;
+ ndata = htonl(data);
+ memcpy(send_buf, &ndata, sizeof(ndata));
+ send_from(p, sport, send_buf, sizeof(ndata));
+
+ i = epoll_wait(epfd, &ev, 1, -1);
+ if (i < 0)
+ error(1, errno, "epoll wait failed");
+
+ if (p.protocol == SOCK_STREAM) {
+ conn = accept(ev.data.fd, NULL, NULL);
+ if (conn < 0)
+ error(1, errno, "error accepting");
+ i = recvmsg(conn, &msg, 0);
+ close(conn);
+ } else {
+ i = recvmsg(ev.data.fd, &msg, 0);
+ }
+ if (i < 0)
+ error(1, errno, "recvmsg error");
+ if (i != sizeof(ndata))
+ error(1, 0, "expected size %zd got %d",
+ sizeof(ndata), i);
+
+ for (i = 0; i < p.recv_socks; ++i)
+ if (ev.data.fd == fd[i])
+ break;
+ memcpy(&ndata, recv_buf, sizeof(ndata));
+ fprintf(stderr, "Socket %d: %d\n", i, ntohl(ndata));
+
+ expected = (sport % mod);
+ if (i != expected)
+ error(1, 0, "expected socket %d", expected);
+ }
+}
+
+static void test_reuseport_ebpf(struct test_params p)
+{
+ int i, fd[p.recv_socks];
+
+ fprintf(stderr, "Testing EBPF mod %zd...\n", p.recv_socks);
+ build_recv_group(p, fd, p.recv_socks, attach_ebpf);
+ test_recv_order(p, fd, p.recv_socks);
+
+ p.send_port_min += p.recv_socks * 2;
+ fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
+ attach_ebpf(fd[0], p.recv_socks / 2);
+ test_recv_order(p, fd, p.recv_socks / 2);
+
+ for (i = 0; i < p.recv_socks; ++i)
+ close(fd[i]);
+}
+
+static void test_reuseport_cbpf(struct test_params p)
+{
+ int i, fd[p.recv_socks];
+
+ fprintf(stderr, "Testing CBPF mod %zd...\n", p.recv_socks);
+ build_recv_group(p, fd, p.recv_socks, attach_cbpf);
+ test_recv_order(p, fd, p.recv_socks);
+
+ p.send_port_min += p.recv_socks * 2;
+ fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
+ attach_cbpf(fd[0], p.recv_socks / 2);
+ test_recv_order(p, fd, p.recv_socks / 2);
+
+ for (i = 0; i < p.recv_socks; ++i)
+ close(fd[i]);
+}
+
+static void test_extra_filter(const struct test_params p)
+{
+ struct sockaddr * const addr =
+ new_any_sockaddr(p.recv_family, p.recv_port);
+ int fd1, fd2, opt;
+
+ fprintf(stderr, "Testing too many filters...\n");
+ fd1 = socket(p.recv_family, p.protocol, 0);
+ if (fd1 < 0)
+ error(1, errno, "failed to create socket 1");
+ fd2 = socket(p.recv_family, p.protocol, 0);
+ if (fd2 < 0)
+ error(1, errno, "failed to create socket 2");
+
+ opt = 1;
+ if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 1");
+ if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 2");
+
+ attach_ebpf(fd1, 10);
+ attach_ebpf(fd2, 10);
+
+ if (bind(fd1, addr, sockaddr_size()))
+ error(1, errno, "failed to bind recv socket 1");
+
+ if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE)
+ error(1, errno, "bind socket 2 should fail with EADDRINUSE");
+
+ free(addr);
+}
+
+static void test_filter_no_reuseport(const struct test_params p)
+{
+ struct sockaddr * const addr =
+ new_any_sockaddr(p.recv_family, p.recv_port);
+ const char bpf_license[] = "GPL";
+ struct bpf_insn ecode[] = {
+ { BPF_ALU64 | BPF_MOV | BPF_K, BPF_REG_0, 0, 0, 10 },
+ { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+ };
+ struct sock_filter ccode[] = {{ BPF_RET | BPF_A, 0, 0, 0 }};
+ union bpf_attr eprog;
+ struct sock_fprog cprog;
+ int fd, bpf_fd;
+
+ fprintf(stderr, "Testing filters on non-SO_REUSEPORT socket...\n");
+
+ memset(&eprog, 0, sizeof(eprog));
+ eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ eprog.insn_cnt = ARRAY_SIZE(ecode);
+ eprog.insns = (unsigned long) &ecode;
+ eprog.license = (unsigned long) &bpf_license;
+ eprog.kern_version = 0;
+
+ memset(&cprog, 0, sizeof(cprog));
+ cprog.len = ARRAY_SIZE(ccode);
+ cprog.filter = ccode;
+
+
+ bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &eprog, sizeof(eprog));
+ if (bpf_fd < 0)
+ error(1, errno, "ebpf error");
+ fd = socket(p.recv_family, p.protocol, 0);
+ if (fd < 0)
+ error(1, errno, "failed to create socket 1");
+
+ if (bind(fd, addr, sockaddr_size()))
+ error(1, errno, "failed to bind recv socket 1");
+
+ errno = 0;
+ if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
+ sizeof(bpf_fd)) || errno != EINVAL)
+ error(1, errno, "setsockopt should have returned EINVAL");
+
+ errno = 0;
+ if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &cprog,
+ sizeof(cprog)) || errno != EINVAL)
+ error(1, errno, "setsockopt should have returned EINVAL");
+
+ free(addr);
+}
+
+static void test_filter_without_bind(void)
+{
+ int fd1, fd2, opt = 1;
+
+ fprintf(stderr, "Testing filter add without bind...\n");
+ fd1 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd1 < 0)
+ error(1, errno, "failed to create socket 1");
+ fd2 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd2 < 0)
+ error(1, errno, "failed to create socket 2");
+ if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 1");
+ if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 2");
+
+ attach_ebpf(fd1, 10);
+ attach_cbpf(fd2, 10);
+
+ close(fd1);
+ close(fd2);
+}
+
+void enable_fastopen(void)
+{
+ int fd = open("/proc/sys/net/ipv4/tcp_fastopen", 0);
+ int rw_mask = 3; /* bit 1: client side; bit-2 server side */
+ int val, size;
+ char buf[16];
+
+ if (fd < 0)
+ error(1, errno, "Unable to open tcp_fastopen sysctl");
+ if (read(fd, buf, sizeof(buf)) <= 0)
+ error(1, errno, "Unable to read tcp_fastopen sysctl");
+ val = atoi(buf);
+ close(fd);
+
+ if ((val & rw_mask) != rw_mask) {
+ fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
+ if (fd < 0)
+ error(1, errno,
+ "Unable to open tcp_fastopen sysctl for writing");
+ val |= rw_mask;
+ size = snprintf(buf, 16, "%d", val);
+ if (write(fd, buf, size) <= 0)
+ error(1, errno, "Unable to write tcp_fastopen sysctl");
+ close(fd);
+ }
+}
+
+static struct rlimit rlim_old;
+
+static __attribute__((constructor)) void main_ctor(void)
+{
+ getrlimit(RLIMIT_MEMLOCK, &rlim_old);
+
+ if (rlim_old.rlim_cur != RLIM_INFINITY) {
+ struct rlimit rlim_new;
+
+ rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+ rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+ setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+ }
+}
+
+static __attribute__((destructor)) void main_dtor(void)
+{
+ setrlimit(RLIMIT_MEMLOCK, &rlim_old);
+}
+
+int main(void)
+{
+ fprintf(stderr, "---- IPv4 UDP ----\n");
+ /* NOTE: UDP socket lookups traverse a different code path when there
+ * are > 10 sockets in a group. Run the bpf test through both paths.
+ */
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8000,
+ .send_port_min = 9000});
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8000,
+ .send_port_min = 9000});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8001,
+ .send_port_min = 9020});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8001,
+ .send_port_min = 9020});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_port = 8002});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_port = 8008});
+
+ fprintf(stderr, "---- IPv6 UDP ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8003,
+ .send_port_min = 9040});
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8003,
+ .send_port_min = 9040});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8004,
+ .send_port_min = 9060});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8004,
+ .send_port_min = 9060});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_port = 8005});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_port = 8009});
+
+ fprintf(stderr, "---- IPv6 UDP w/ mapped IPv4 ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8006,
+ .send_port_min = 9080});
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8006,
+ .send_port_min = 9080});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8007,
+ .send_port_min = 9100});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8007,
+ .send_port_min = 9100});
+
+ /* TCP fastopen is required for the TCP tests */
+ enable_fastopen();
+ fprintf(stderr, "---- IPv4 TCP ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8008,
+ .send_port_min = 9120});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8009,
+ .send_port_min = 9160});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8010});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8011});
+
+ fprintf(stderr, "---- IPv6 TCP ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8012,
+ .send_port_min = 9200});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8013,
+ .send_port_min = 9240});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8014});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8015});
+
+ fprintf(stderr, "---- IPv6 TCP w/ mapped IPv4 ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8016,
+ .send_port_min = 9320});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8017,
+ .send_port_min = 9360});
+
+ test_filter_without_bind();
+
+ fprintf(stderr, "SUCCESS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/reuseport_bpf_cpu.c b/tools/testing/selftests/net/reuseport_bpf_cpu.c
new file mode 100644
index 000000000..2d6461747
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_bpf_cpu.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test functionality of BPF filters with SO_REUSEPORT. This program creates
+ * an SO_REUSEPORT receiver group containing one socket per CPU core. It then
+ * creates a BPF program that will select a socket from this group based
+ * on the core id that receives the packet. The sending code artificially
+ * moves itself to run on different core ids and sends one message from
+ * each core. Since these packets are delivered over loopback, they should
+ * arrive on the same core that sent them. The receiving code then ensures
+ * that the packet was received on the socket for the corresponding core id.
+ * This entire process is done for several different core id permutations
+ * and for each IPv4/IPv6 and TCP/UDP combination.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/filter.h>
+#include <linux/in.h>
+#include <linux/unistd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+static const int PORT = 8888;
+
+static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto)
+{
+ struct sockaddr_storage addr;
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+ size_t i;
+ int opt;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)&addr;
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ addr4->sin_port = htons(PORT);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)&addr;
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_any;
+ addr6->sin6_port = htons(PORT);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ for (i = 0; i < len; ++i) {
+ rcv_fd[i] = socket(family, proto, 0);
+ if (rcv_fd[i] < 0)
+ error(1, errno, "failed to create receive socket");
+
+ opt = 1;
+ if (setsockopt(rcv_fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT");
+
+ if (bind(rcv_fd[i], (struct sockaddr *)&addr, sizeof(addr)))
+ error(1, errno, "failed to bind receive socket");
+
+ if (proto == SOCK_STREAM && listen(rcv_fd[i], len * 10))
+ error(1, errno, "failed to listen on receive port");
+ }
+}
+
+static void attach_bpf(int fd)
+{
+ struct sock_filter code[] = {
+ /* A = raw_smp_processor_id() */
+ { BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_CPU },
+ /* return A */
+ { BPF_RET | BPF_A, 0, 0, 0 },
+ };
+ struct sock_fprog p = {
+ .len = 2,
+ .filter = code,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
+ error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
+}
+
+static void send_from_cpu(int cpu_id, int family, int proto)
+{
+ struct sockaddr_storage saddr, daddr;
+ struct sockaddr_in *saddr4, *daddr4;
+ struct sockaddr_in6 *saddr6, *daddr6;
+ cpu_set_t cpu_set;
+ int fd;
+
+ switch (family) {
+ case AF_INET:
+ saddr4 = (struct sockaddr_in *)&saddr;
+ saddr4->sin_family = AF_INET;
+ saddr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ saddr4->sin_port = 0;
+
+ daddr4 = (struct sockaddr_in *)&daddr;
+ daddr4->sin_family = AF_INET;
+ daddr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ daddr4->sin_port = htons(PORT);
+ break;
+ case AF_INET6:
+ saddr6 = (struct sockaddr_in6 *)&saddr;
+ saddr6->sin6_family = AF_INET6;
+ saddr6->sin6_addr = in6addr_any;
+ saddr6->sin6_port = 0;
+
+ daddr6 = (struct sockaddr_in6 *)&daddr;
+ daddr6->sin6_family = AF_INET6;
+ daddr6->sin6_addr = in6addr_loopback;
+ daddr6->sin6_port = htons(PORT);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ memset(&cpu_set, 0, sizeof(cpu_set));
+ CPU_SET(cpu_id, &cpu_set);
+ if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0)
+ error(1, errno, "failed to pin to cpu");
+
+ fd = socket(family, proto, 0);
+ if (fd < 0)
+ error(1, errno, "failed to create send socket");
+
+ if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr)))
+ error(1, errno, "failed to bind send socket");
+
+ if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr)))
+ error(1, errno, "failed to connect send socket");
+
+ if (send(fd, "a", 1, 0) < 0)
+ error(1, errno, "failed to send message");
+
+ close(fd);
+}
+
+static
+void receive_on_cpu(int *rcv_fd, int len, int epfd, int cpu_id, int proto)
+{
+ struct epoll_event ev;
+ int i, fd;
+ char buf[8];
+
+ i = epoll_wait(epfd, &ev, 1, -1);
+ if (i < 0)
+ error(1, errno, "epoll_wait failed");
+
+ if (proto == SOCK_STREAM) {
+ fd = accept(ev.data.fd, NULL, NULL);
+ if (fd < 0)
+ error(1, errno, "failed to accept");
+ i = recv(fd, buf, sizeof(buf), 0);
+ close(fd);
+ } else {
+ i = recv(ev.data.fd, buf, sizeof(buf), 0);
+ }
+
+ if (i < 0)
+ error(1, errno, "failed to recv");
+
+ for (i = 0; i < len; ++i)
+ if (ev.data.fd == rcv_fd[i])
+ break;
+ if (i == len)
+ error(1, 0, "failed to find socket");
+ fprintf(stderr, "send cpu %d, receive socket %d\n", cpu_id, i);
+ if (cpu_id != i)
+ error(1, 0, "cpu id/receive socket mismatch");
+}
+
+static void test(int *rcv_fd, int len, int family, int proto)
+{
+ struct epoll_event ev;
+ int epfd, cpu;
+
+ build_rcv_group(rcv_fd, len, family, proto);
+ attach_bpf(rcv_fd[0]);
+
+ epfd = epoll_create(1);
+ if (epfd < 0)
+ error(1, errno, "failed to create epoll");
+ for (cpu = 0; cpu < len; ++cpu) {
+ ev.events = EPOLLIN;
+ ev.data.fd = rcv_fd[cpu];
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fd[cpu], &ev))
+ error(1, errno, "failed to register sock epoll");
+ }
+
+ /* Forward iterate */
+ for (cpu = 0; cpu < len; ++cpu) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ /* Reverse iterate */
+ for (cpu = len - 1; cpu >= 0; --cpu) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ /* Even cores */
+ for (cpu = 0; cpu < len; cpu += 2) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ /* Odd cores */
+ for (cpu = 1; cpu < len; cpu += 2) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ close(epfd);
+ for (cpu = 0; cpu < len; ++cpu)
+ close(rcv_fd[cpu]);
+}
+
+int main(void)
+{
+ int *rcv_fd, cpus;
+
+ cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ if (cpus <= 0)
+ error(1, errno, "failed counting cpus");
+
+ rcv_fd = calloc(cpus, sizeof(int));
+ if (!rcv_fd)
+ error(1, 0, "failed to allocate array");
+
+ fprintf(stderr, "---- IPv4 UDP ----\n");
+ test(rcv_fd, cpus, AF_INET, SOCK_DGRAM);
+
+ fprintf(stderr, "---- IPv6 UDP ----\n");
+ test(rcv_fd, cpus, AF_INET6, SOCK_DGRAM);
+
+ fprintf(stderr, "---- IPv4 TCP ----\n");
+ test(rcv_fd, cpus, AF_INET, SOCK_STREAM);
+
+ fprintf(stderr, "---- IPv6 TCP ----\n");
+ test(rcv_fd, cpus, AF_INET6, SOCK_STREAM);
+
+ free(rcv_fd);
+
+ fprintf(stderr, "SUCCESS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c
new file mode 100644
index 000000000..c9f478b40
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_bpf_numa.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test functionality of BPF filters with SO_REUSEPORT. Same test as
+ * in reuseport_bpf_cpu, only as one socket per NUMA node.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/unistd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <numa.h>
+
+#include "../kselftest.h"
+
+static const int PORT = 8888;
+
+static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto)
+{
+ struct sockaddr_storage addr;
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+ size_t i;
+ int opt;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)&addr;
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ addr4->sin_port = htons(PORT);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)&addr;
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_any;
+ addr6->sin6_port = htons(PORT);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ for (i = 0; i < len; ++i) {
+ rcv_fd[i] = socket(family, proto, 0);
+ if (rcv_fd[i] < 0)
+ error(1, errno, "failed to create receive socket");
+
+ opt = 1;
+ if (setsockopt(rcv_fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT");
+
+ if (bind(rcv_fd[i], (struct sockaddr *)&addr, sizeof(addr)))
+ error(1, errno, "failed to bind receive socket");
+
+ if (proto == SOCK_STREAM && listen(rcv_fd[i], len * 10))
+ error(1, errno, "failed to listen on receive port");
+ }
+}
+
+static void attach_bpf(int fd)
+{
+ static char bpf_log_buf[65536];
+ static const char bpf_license[] = "";
+
+ int bpf_fd;
+ const struct bpf_insn prog[] = {
+ /* R0 = bpf_get_numa_node_id() */
+ { BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_numa_node_id },
+ /* return R0 */
+ { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+ };
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
+ attr.insns = (unsigned long) &prog;
+ attr.license = (unsigned long) &bpf_license;
+ attr.log_buf = (unsigned long) &bpf_log_buf;
+ attr.log_size = sizeof(bpf_log_buf);
+ attr.log_level = 1;
+
+ bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (bpf_fd < 0)
+ error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf);
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
+ sizeof(bpf_fd)))
+ error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
+
+ close(bpf_fd);
+}
+
+static void send_from_node(int node_id, int family, int proto)
+{
+ struct sockaddr_storage saddr, daddr;
+ struct sockaddr_in *saddr4, *daddr4;
+ struct sockaddr_in6 *saddr6, *daddr6;
+ int fd;
+
+ switch (family) {
+ case AF_INET:
+ saddr4 = (struct sockaddr_in *)&saddr;
+ saddr4->sin_family = AF_INET;
+ saddr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ saddr4->sin_port = 0;
+
+ daddr4 = (struct sockaddr_in *)&daddr;
+ daddr4->sin_family = AF_INET;
+ daddr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ daddr4->sin_port = htons(PORT);
+ break;
+ case AF_INET6:
+ saddr6 = (struct sockaddr_in6 *)&saddr;
+ saddr6->sin6_family = AF_INET6;
+ saddr6->sin6_addr = in6addr_any;
+ saddr6->sin6_port = 0;
+
+ daddr6 = (struct sockaddr_in6 *)&daddr;
+ daddr6->sin6_family = AF_INET6;
+ daddr6->sin6_addr = in6addr_loopback;
+ daddr6->sin6_port = htons(PORT);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ if (numa_run_on_node(node_id) < 0)
+ error(1, errno, "failed to pin to node");
+
+ fd = socket(family, proto, 0);
+ if (fd < 0)
+ error(1, errno, "failed to create send socket");
+
+ if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr)))
+ error(1, errno, "failed to bind send socket");
+
+ if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr)))
+ error(1, errno, "failed to connect send socket");
+
+ if (send(fd, "a", 1, 0) < 0)
+ error(1, errno, "failed to send message");
+
+ close(fd);
+}
+
+static
+void receive_on_node(int *rcv_fd, int len, int epfd, int node_id, int proto)
+{
+ struct epoll_event ev;
+ int i, fd;
+ char buf[8];
+
+ i = epoll_wait(epfd, &ev, 1, -1);
+ if (i < 0)
+ error(1, errno, "epoll_wait failed");
+
+ if (proto == SOCK_STREAM) {
+ fd = accept(ev.data.fd, NULL, NULL);
+ if (fd < 0)
+ error(1, errno, "failed to accept");
+ i = recv(fd, buf, sizeof(buf), 0);
+ close(fd);
+ } else {
+ i = recv(ev.data.fd, buf, sizeof(buf), 0);
+ }
+
+ if (i < 0)
+ error(1, errno, "failed to recv");
+
+ for (i = 0; i < len; ++i)
+ if (ev.data.fd == rcv_fd[i])
+ break;
+ if (i == len)
+ error(1, 0, "failed to find socket");
+ fprintf(stderr, "send node %d, receive socket %d\n", node_id, i);
+ if (node_id != i)
+ error(1, 0, "node id/receive socket mismatch");
+}
+
+static void test(int *rcv_fd, int len, int family, int proto)
+{
+ struct epoll_event ev;
+ int epfd, node;
+
+ build_rcv_group(rcv_fd, len, family, proto);
+ attach_bpf(rcv_fd[0]);
+
+ epfd = epoll_create(1);
+ if (epfd < 0)
+ error(1, errno, "failed to create epoll");
+ for (node = 0; node < len; ++node) {
+ ev.events = EPOLLIN;
+ ev.data.fd = rcv_fd[node];
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fd[node], &ev))
+ error(1, errno, "failed to register sock epoll");
+ }
+
+ /* Forward iterate */
+ for (node = 0; node < len; ++node) {
+ send_from_node(node, family, proto);
+ receive_on_node(rcv_fd, len, epfd, node, proto);
+ }
+
+ /* Reverse iterate */
+ for (node = len - 1; node >= 0; --node) {
+ send_from_node(node, family, proto);
+ receive_on_node(rcv_fd, len, epfd, node, proto);
+ }
+
+ close(epfd);
+ for (node = 0; node < len; ++node)
+ close(rcv_fd[node]);
+}
+
+int main(void)
+{
+ int *rcv_fd, nodes;
+
+ if (numa_available() < 0)
+ ksft_exit_skip("no numa api support\n");
+
+ nodes = numa_max_node() + 1;
+
+ rcv_fd = calloc(nodes, sizeof(int));
+ if (!rcv_fd)
+ error(1, 0, "failed to allocate array");
+
+ fprintf(stderr, "---- IPv4 UDP ----\n");
+ test(rcv_fd, nodes, AF_INET, SOCK_DGRAM);
+
+ fprintf(stderr, "---- IPv6 UDP ----\n");
+ test(rcv_fd, nodes, AF_INET6, SOCK_DGRAM);
+
+ fprintf(stderr, "---- IPv4 TCP ----\n");
+ test(rcv_fd, nodes, AF_INET, SOCK_STREAM);
+
+ fprintf(stderr, "---- IPv6 TCP ----\n");
+ test(rcv_fd, nodes, AF_INET6, SOCK_STREAM);
+
+ free(rcv_fd);
+
+ fprintf(stderr, "SUCCESS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/reuseport_dualstack.c b/tools/testing/selftests/net/reuseport_dualstack.c
new file mode 100644
index 000000000..fb7a59ed7
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_dualstack.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * It is possible to use SO_REUSEPORT to open multiple sockets bound to
+ * equivalent local addresses using AF_INET and AF_INET6 at the same time. If
+ * the AF_INET6 socket has IPV6_V6ONLY set, it's clear which socket should
+ * receive a given incoming packet. However, when it is not set, incoming v4
+ * packets should prefer the AF_INET socket(s). This behavior was defined with
+ * the original SO_REUSEPORT implementation, but broke with
+ * e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
+ * This test creates these mixed AF_INET/AF_INET6 sockets and asserts the
+ * AF_INET preference for v4 packets.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/in.h>
+#include <linux/unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+static const int PORT = 8888;
+
+static void build_rcv_fd(int family, int proto, int *rcv_fds, int count)
+{
+ struct sockaddr_storage addr;
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+ int opt, i;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)&addr;
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ addr4->sin_port = htons(PORT);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)&addr;
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_any;
+ addr6->sin6_port = htons(PORT);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ for (i = 0; i < count; ++i) {
+ rcv_fds[i] = socket(family, proto, 0);
+ if (rcv_fds[i] < 0)
+ error(1, errno, "failed to create receive socket");
+
+ opt = 1;
+ if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT");
+
+ if (bind(rcv_fds[i], (struct sockaddr *)&addr, sizeof(addr)))
+ error(1, errno, "failed to bind receive socket");
+
+ if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
+ error(1, errno, "failed to listen on receive port");
+ }
+}
+
+static void send_from_v4(int proto)
+{
+ struct sockaddr_in saddr, daddr;
+ int fd;
+
+ saddr.sin_family = AF_INET;
+ saddr.sin_addr.s_addr = htonl(INADDR_ANY);
+ saddr.sin_port = 0;
+
+ daddr.sin_family = AF_INET;
+ daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ daddr.sin_port = htons(PORT);
+
+ fd = socket(AF_INET, proto, 0);
+ if (fd < 0)
+ error(1, errno, "failed to create send socket");
+
+ if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr)))
+ error(1, errno, "failed to bind send socket");
+
+ if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr)))
+ error(1, errno, "failed to connect send socket");
+
+ if (send(fd, "a", 1, 0) < 0)
+ error(1, errno, "failed to send message");
+
+ close(fd);
+}
+
+static int receive_once(int epfd, int proto)
+{
+ struct epoll_event ev;
+ int i, fd;
+ char buf[8];
+
+ i = epoll_wait(epfd, &ev, 1, -1);
+ if (i < 0)
+ error(1, errno, "epoll_wait failed");
+
+ if (proto == SOCK_STREAM) {
+ fd = accept(ev.data.fd, NULL, NULL);
+ if (fd < 0)
+ error(1, errno, "failed to accept");
+ i = recv(fd, buf, sizeof(buf), 0);
+ close(fd);
+ } else {
+ i = recv(ev.data.fd, buf, sizeof(buf), 0);
+ }
+
+ if (i < 0)
+ error(1, errno, "failed to recv");
+
+ return ev.data.fd;
+}
+
+static void test(int *rcv_fds, int count, int proto)
+{
+ struct epoll_event ev;
+ int epfd, i, test_fd;
+ int test_family;
+ socklen_t len;
+
+ epfd = epoll_create(1);
+ if (epfd < 0)
+ error(1, errno, "failed to create epoll");
+
+ ev.events = EPOLLIN;
+ for (i = 0; i < count; ++i) {
+ ev.data.fd = rcv_fds[i];
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev))
+ error(1, errno, "failed to register sock epoll");
+ }
+
+ send_from_v4(proto);
+
+ test_fd = receive_once(epfd, proto);
+ len = sizeof(test_family);
+ if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len))
+ error(1, errno, "failed to read socket domain");
+ if (test_family != AF_INET)
+ error(1, 0, "expected to receive on v4 socket but got v6 (%d)",
+ test_family);
+
+ close(epfd);
+}
+
+int main(void)
+{
+ int rcv_fds[32], i;
+
+ fprintf(stderr, "---- UDP IPv4 created before IPv6 ----\n");
+ build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 5);
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, &(rcv_fds[5]), 5);
+ test(rcv_fds, 10, SOCK_DGRAM);
+ for (i = 0; i < 10; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- UDP IPv6 created before IPv4 ----\n");
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds, 5);
+ build_rcv_fd(AF_INET, SOCK_DGRAM, &(rcv_fds[5]), 5);
+ test(rcv_fds, 10, SOCK_DGRAM);
+ for (i = 0; i < 10; ++i)
+ close(rcv_fds[i]);
+
+ /* NOTE: UDP socket lookups traverse a different code path when there
+ * are > 10 sockets in a group.
+ */
+ fprintf(stderr, "---- UDP IPv4 created before IPv6 (large) ----\n");
+ build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 16);
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, &(rcv_fds[16]), 16);
+ test(rcv_fds, 32, SOCK_DGRAM);
+ for (i = 0; i < 32; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- UDP IPv6 created before IPv4 (large) ----\n");
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds, 16);
+ build_rcv_fd(AF_INET, SOCK_DGRAM, &(rcv_fds[16]), 16);
+ test(rcv_fds, 32, SOCK_DGRAM);
+ for (i = 0; i < 32; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- TCP IPv4 created before IPv6 ----\n");
+ build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 5);
+ build_rcv_fd(AF_INET6, SOCK_STREAM, &(rcv_fds[5]), 5);
+ test(rcv_fds, 10, SOCK_STREAM);
+ for (i = 0; i < 10; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- TCP IPv6 created before IPv4 ----\n");
+ build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds, 5);
+ build_rcv_fd(AF_INET, SOCK_STREAM, &(rcv_fds[5]), 5);
+ test(rcv_fds, 10, SOCK_STREAM);
+ for (i = 0; i < 10; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "SUCCESS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
new file mode 100755
index 000000000..ff665de78
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -0,0 +1,1023 @@
+#!/bin/bash
+#
+# This test is for checking rtnetlink callpaths, and get as much coverage as possible.
+#
+# set -e
+
+devdummy="test-dummy0"
+ret=0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# set global exit status, but never reset nonzero one.
+check_err()
+{
+ if [ $ret -eq 0 ]; then
+ ret=$1
+ fi
+}
+
+# same but inverted -- used when command must fail for test to pass
+check_fail()
+{
+ if [ $1 -eq 0 ]; then
+ ret=1
+ fi
+}
+
+kci_add_dummy()
+{
+ ip link add name "$devdummy" type dummy
+ check_err $?
+ ip link set "$devdummy" up
+ check_err $?
+}
+
+kci_del_dummy()
+{
+ ip link del dev "$devdummy"
+ check_err $?
+}
+
+kci_test_netconf()
+{
+ dev="$1"
+ r=$ret
+
+ ip netconf show dev "$dev" > /dev/null
+ check_err $?
+
+ for f in 4 6; do
+ ip -$f netconf show dev "$dev" > /dev/null
+ check_err $?
+ done
+
+ if [ $ret -ne 0 ] ;then
+ echo "FAIL: ip netconf show $dev"
+ test $r -eq 0 && ret=0
+ return 1
+ fi
+}
+
+# add a bridge with vlans on top
+kci_test_bridge()
+{
+ devbr="test-br0"
+ vlandev="testbr-vlan1"
+
+ ret=0
+ ip link add name "$devbr" type bridge
+ check_err $?
+
+ ip link set dev "$devdummy" master "$devbr"
+ check_err $?
+
+ ip link set "$devbr" up
+ check_err $?
+
+ ip link add link "$devbr" name "$vlandev" type vlan id 1
+ check_err $?
+ ip addr add dev "$vlandev" 10.200.7.23/30
+ check_err $?
+ ip -6 addr add dev "$vlandev" dead:42::1234/64
+ check_err $?
+ ip -d link > /dev/null
+ check_err $?
+ ip r s t all > /dev/null
+ check_err $?
+
+ for name in "$devbr" "$vlandev" "$devdummy" ; do
+ kci_test_netconf "$name"
+ done
+
+ ip -6 addr del dev "$vlandev" dead:42::1234/64
+ check_err $?
+
+ ip link del dev "$vlandev"
+ check_err $?
+ ip link del dev "$devbr"
+ check_err $?
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: bridge setup"
+ return 1
+ fi
+ echo "PASS: bridge setup"
+
+}
+
+kci_test_gre()
+{
+ gredev=neta
+ rem=10.42.42.1
+ loc=10.0.0.1
+
+ ret=0
+ ip tunnel add $gredev mode gre remote $rem local $loc ttl 1
+ check_err $?
+ ip link set $gredev up
+ check_err $?
+ ip addr add 10.23.7.10 dev $gredev
+ check_err $?
+ ip route add 10.23.8.0/30 dev $gredev
+ check_err $?
+ ip addr add dev "$devdummy" 10.23.7.11/24
+ check_err $?
+ ip link > /dev/null
+ check_err $?
+ ip addr > /dev/null
+ check_err $?
+
+ kci_test_netconf "$gredev"
+
+ ip addr del dev "$devdummy" 10.23.7.11/24
+ check_err $?
+
+ ip link del $gredev
+ check_err $?
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: gre tunnel endpoint"
+ return 1
+ fi
+ echo "PASS: gre tunnel endpoint"
+}
+
+# tc uses rtnetlink too, for full tc testing
+# please see tools/testing/selftests/tc-testing.
+kci_test_tc()
+{
+ dev=lo
+ ret=0
+
+ tc qdisc add dev "$dev" root handle 1: htb
+ check_err $?
+ tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit
+ check_err $?
+ tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256
+ check_err $?
+ tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256
+ check_err $?
+ tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256
+ check_err $?
+ tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10
+ check_err $?
+ tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10
+ check_err $?
+ tc filter show dev "$dev" parent 1:0 > /dev/null
+ check_err $?
+ tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32
+ check_err $?
+ tc filter show dev "$dev" parent 1:0 > /dev/null
+ check_err $?
+ tc qdisc del dev "$dev" root handle 1: htb
+ check_err $?
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: tc htb hierarchy"
+ return 1
+ fi
+ echo "PASS: tc htb hierarchy"
+
+}
+
+kci_test_polrouting()
+{
+ ret=0
+ ip rule add fwmark 1 lookup 100
+ check_err $?
+ ip route add local 0.0.0.0/0 dev lo table 100
+ check_err $?
+ ip r s t all > /dev/null
+ check_err $?
+ ip rule del fwmark 1 lookup 100
+ check_err $?
+ ip route del local 0.0.0.0/0 dev lo table 100
+ check_err $?
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: policy route test"
+ return 1
+ fi
+ echo "PASS: policy routing"
+}
+
+kci_test_route_get()
+{
+ ret=0
+
+ ip route get 127.0.0.1 > /dev/null
+ check_err $?
+ ip route get 127.0.0.1 dev "$devdummy" > /dev/null
+ check_err $?
+ ip route get ::1 > /dev/null
+ check_err $?
+ ip route get fe80::1 dev "$devdummy" > /dev/null
+ check_err $?
+ ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x1 mark 0x1 > /dev/null
+ check_err $?
+ ip route get ::1 from ::1 iif lo oif lo tos 0x1 mark 0x1 > /dev/null
+ check_err $?
+ ip addr add dev "$devdummy" 10.23.7.11/24
+ check_err $?
+ ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" > /dev/null
+ check_err $?
+ ip addr del dev "$devdummy" 10.23.7.11/24
+ check_err $?
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: route get"
+ return 1
+ fi
+
+ echo "PASS: route get"
+}
+
+kci_test_addrlft()
+{
+ for i in $(seq 10 100) ;do
+ lft=$(((RANDOM%3) + 1))
+ ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
+ check_err $?
+ done
+
+ sleep 5
+
+ ip addr show dev "$devdummy" | grep "10.23.11."
+ if [ $? -eq 0 ]; then
+ echo "FAIL: preferred_lft addresses remaining"
+ check_err 1
+ return
+ fi
+
+ echo "PASS: preferred_lft addresses have expired"
+}
+
+kci_test_addrlabel()
+{
+ ret=0
+
+ ip addrlabel add prefix dead::/64 dev lo label 1
+ check_err $?
+
+ ip addrlabel list |grep -q "prefix dead::/64 dev lo label 1"
+ check_err $?
+
+ ip addrlabel del prefix dead::/64 dev lo label 1 2> /dev/null
+ check_err $?
+
+ ip addrlabel add prefix dead::/64 label 1 2> /dev/null
+ check_err $?
+
+ ip addrlabel del prefix dead::/64 label 1 2> /dev/null
+ check_err $?
+
+ # concurrent add/delete
+ for i in $(seq 1 1000); do
+ ip addrlabel add prefix 1c3::/64 label 12345 2>/dev/null
+ done &
+
+ for i in $(seq 1 1000); do
+ ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null
+ done
+
+ wait
+
+ ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: ipv6 addrlabel"
+ return 1
+ fi
+
+ echo "PASS: ipv6 addrlabel"
+}
+
+kci_test_ifalias()
+{
+ ret=0
+ namewant=$(uuidgen)
+ syspathname="/sys/class/net/$devdummy/ifalias"
+
+ ip link set dev "$devdummy" alias "$namewant"
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: cannot set interface alias of $devdummy to $namewant"
+ return 1
+ fi
+
+ ip link show "$devdummy" | grep -q "alias $namewant"
+ check_err $?
+
+ if [ -r "$syspathname" ] ; then
+ read namehave < "$syspathname"
+ if [ "$namewant" != "$namehave" ]; then
+ echo "FAIL: did set ifalias $namewant but got $namehave"
+ return 1
+ fi
+
+ namewant=$(uuidgen)
+ echo "$namewant" > "$syspathname"
+ ip link show "$devdummy" | grep -q "alias $namewant"
+ check_err $?
+
+ # sysfs interface allows to delete alias again
+ echo "" > "$syspathname"
+
+ ip link show "$devdummy" | grep -q "alias $namewant"
+ check_fail $?
+
+ for i in $(seq 1 100); do
+ uuidgen > "$syspathname" &
+ done
+
+ wait
+
+ # re-add the alias -- kernel should free mem when dummy dev is removed
+ ip link set dev "$devdummy" alias "$namewant"
+ check_err $?
+ fi
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: set interface alias $devdummy to $namewant"
+ return 1
+ fi
+
+ echo "PASS: set ifalias $namewant for $devdummy"
+}
+
+kci_test_vrf()
+{
+ vrfname="test-vrf"
+ ret=0
+
+ ip link show type vrf 2>/dev/null
+ if [ $? -ne 0 ]; then
+ echo "SKIP: vrf: iproute2 too old"
+ return $ksft_skip
+ fi
+
+ ip link add "$vrfname" type vrf table 10
+ check_err $?
+ if [ $ret -ne 0 ];then
+ echo "FAIL: can't add vrf interface, skipping test"
+ return 0
+ fi
+
+ ip -br link show type vrf | grep -q "$vrfname"
+ check_err $?
+ if [ $ret -ne 0 ];then
+ echo "FAIL: created vrf device not found"
+ return 1
+ fi
+
+ ip link set dev "$vrfname" up
+ check_err $?
+
+ ip link set dev "$devdummy" master "$vrfname"
+ check_err $?
+ ip link del dev "$vrfname"
+ check_err $?
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: vrf"
+ return 1
+ fi
+
+ echo "PASS: vrf"
+}
+
+kci_test_encap_vxlan()
+{
+ ret=0
+ vxlan="test-vxlan0"
+ vlan="test-vlan0"
+ testns="$1"
+
+ ip netns exec "$testns" ip link add "$vxlan" type vxlan id 42 group 239.1.1.1 \
+ dev "$devdummy" dstport 4789 2>/dev/null
+ if [ $? -ne 0 ]; then
+ echo "FAIL: can't add vxlan interface, skipping test"
+ return 0
+ fi
+ check_err $?
+
+ ip netns exec "$testns" ip addr add 10.2.11.49/24 dev "$vxlan"
+ check_err $?
+
+ ip netns exec "$testns" ip link set up dev "$vxlan"
+ check_err $?
+
+ ip netns exec "$testns" ip link add link "$vxlan" name "$vlan" type vlan id 1
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$vxlan"
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: vxlan"
+ return 1
+ fi
+ echo "PASS: vxlan"
+}
+
+kci_test_encap_fou()
+{
+ ret=0
+ name="test-fou"
+ testns="$1"
+
+ ip fou help 2>&1 |grep -q 'Usage: ip fou'
+ if [ $? -ne 0 ];then
+ echo "SKIP: fou: iproute2 too old"
+ return $ksft_skip
+ fi
+
+ ip netns exec "$testns" ip fou add port 7777 ipproto 47 2>/dev/null
+ if [ $? -ne 0 ];then
+ echo "FAIL: can't add fou port 7777, skipping test"
+ return 1
+ fi
+
+ ip netns exec "$testns" ip fou add port 8888 ipproto 4
+ check_err $?
+
+ ip netns exec "$testns" ip fou del port 9999 2>/dev/null
+ check_fail $?
+
+ ip netns exec "$testns" ip fou del port 7777
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: fou"
+ return 1
+ fi
+
+ echo "PASS: fou"
+}
+
+# test various encap methods, use netns to avoid unwanted interference
+kci_test_encap()
+{
+ testns="testns"
+ ret=0
+
+ ip netns add "$testns"
+ if [ $? -ne 0 ]; then
+ echo "SKIP encap tests: cannot add net namespace $testns"
+ return $ksft_skip
+ fi
+
+ ip netns exec "$testns" ip link set lo up
+ check_err $?
+
+ ip netns exec "$testns" ip link add name "$devdummy" type dummy
+ check_err $?
+ ip netns exec "$testns" ip link set "$devdummy" up
+ check_err $?
+
+ kci_test_encap_vxlan "$testns"
+ kci_test_encap_fou "$testns"
+
+ ip netns del "$testns"
+}
+
+kci_test_macsec()
+{
+ msname="test_macsec0"
+ ret=0
+
+ ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
+ if [ $? -ne 0 ]; then
+ echo "SKIP: macsec: iproute2 too old"
+ return $ksft_skip
+ fi
+
+ ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
+ check_err $?
+ if [ $ret -ne 0 ];then
+ echo "FAIL: can't add macsec interface, skipping test"
+ return 1
+ fi
+
+ ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
+ check_err $?
+
+ ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
+ check_err $?
+
+ ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef
+ check_err $?
+
+ ip macsec show > /dev/null
+ check_err $?
+
+ ip link del dev "$msname"
+ check_err $?
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: macsec"
+ return 1
+ fi
+
+ echo "PASS: macsec"
+}
+
+#-------------------------------------------------------------------
+# Example commands
+# ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
+# spi 0x07 mode transport reqid 0x07 replay-window 32 \
+# aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \
+# sel src 14.0.0.52/24 dst 14.0.0.70/24
+# ip x p add dir out src 14.0.0.52/24 dst 14.0.0.70/24 \
+# tmpl proto esp src 14.0.0.52 dst 14.0.0.70 \
+# spi 0x07 mode transport reqid 0x07
+#
+# Subcommands not tested
+# ip x s update
+# ip x s allocspi
+# ip x s deleteall
+# ip x p update
+# ip x p deleteall
+# ip x p set
+#-------------------------------------------------------------------
+kci_test_ipsec()
+{
+ ret=0
+ algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
+ srcip=192.168.123.1
+ dstip=192.168.123.2
+ spi=7
+
+ ip addr add $srcip dev $devdummy
+
+ # flush to be sure there's nothing configured
+ ip x s flush ; ip x p flush
+ check_err $?
+
+ # start the monitor in the background
+ tmpfile=`mktemp /var/run/ipsectestXXX`
+ mpid=`(ip x m > $tmpfile & echo $!) 2>/dev/null`
+ sleep 0.2
+
+ ipsecid="proto esp src $srcip dst $dstip spi 0x07"
+ ip x s add $ipsecid \
+ mode transport reqid 0x07 replay-window 32 \
+ $algo sel src $srcip/24 dst $dstip/24
+ check_err $?
+
+ lines=`ip x s list | grep $srcip | grep $dstip | wc -l`
+ test $lines -eq 2
+ check_err $?
+
+ ip x s count | grep -q "SAD count 1"
+ check_err $?
+
+ lines=`ip x s get $ipsecid | grep $srcip | grep $dstip | wc -l`
+ test $lines -eq 2
+ check_err $?
+
+ ip x s delete $ipsecid
+ check_err $?
+
+ lines=`ip x s list | wc -l`
+ test $lines -eq 0
+ check_err $?
+
+ ipsecsel="dir out src $srcip/24 dst $dstip/24"
+ ip x p add $ipsecsel \
+ tmpl proto esp src $srcip dst $dstip \
+ spi 0x07 mode transport reqid 0x07
+ check_err $?
+
+ lines=`ip x p list | grep $srcip | grep $dstip | wc -l`
+ test $lines -eq 2
+ check_err $?
+
+ ip x p count | grep -q "SPD IN 0 OUT 1 FWD 0"
+ check_err $?
+
+ lines=`ip x p get $ipsecsel | grep $srcip | grep $dstip | wc -l`
+ test $lines -eq 2
+ check_err $?
+
+ ip x p delete $ipsecsel
+ check_err $?
+
+ lines=`ip x p list | wc -l`
+ test $lines -eq 0
+ check_err $?
+
+ # check the monitor results
+ kill $mpid
+ lines=`wc -l $tmpfile | cut "-d " -f1`
+ test $lines -eq 20
+ check_err $?
+ rm -rf $tmpfile
+
+ # clean up any leftovers
+ ip x s flush
+ check_err $?
+ ip x p flush
+ check_err $?
+ ip addr del $srcip/32 dev $devdummy
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: ipsec"
+ return 1
+ fi
+ echo "PASS: ipsec"
+}
+
+#-------------------------------------------------------------------
+# Example commands
+# ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
+# spi 0x07 mode transport reqid 0x07 replay-window 32 \
+# aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \
+# sel src 14.0.0.52/24 dst 14.0.0.70/24
+# offload dev sim1 dir out
+# ip x p add dir out src 14.0.0.52/24 dst 14.0.0.70/24 \
+# tmpl proto esp src 14.0.0.52 dst 14.0.0.70 \
+# spi 0x07 mode transport reqid 0x07
+#
+#-------------------------------------------------------------------
+kci_test_ipsec_offload()
+{
+ ret=0
+ algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
+ srcip=192.168.123.3
+ dstip=192.168.123.4
+ dev=simx1
+ sysfsd=/sys/kernel/debug/netdevsim/$dev
+ sysfsf=$sysfsd/ipsec
+
+ # setup netdevsim since dummydev doesn't have offload support
+ modprobe netdevsim
+ check_err $?
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: ipsec_offload can't load netdevsim"
+ return 1
+ fi
+
+ ip link add $dev type netdevsim
+ ip addr add $srcip dev $dev
+ ip link set $dev up
+ if [ ! -d $sysfsd ] ; then
+ echo "FAIL: ipsec_offload can't create device $dev"
+ return 1
+ fi
+ if [ ! -f $sysfsf ] ; then
+ echo "FAIL: ipsec_offload netdevsim doesn't support IPsec offload"
+ return 1
+ fi
+
+ # flush to be sure there's nothing configured
+ ip x s flush ; ip x p flush
+
+ # create offloaded SAs, both in and out
+ ip x p add dir out src $srcip/24 dst $dstip/24 \
+ tmpl proto esp src $srcip dst $dstip spi 9 \
+ mode transport reqid 42
+ check_err $?
+ ip x p add dir out src $dstip/24 dst $srcip/24 \
+ tmpl proto esp src $dstip dst $srcip spi 9 \
+ mode transport reqid 42
+ check_err $?
+
+ ip x s add proto esp src $srcip dst $dstip spi 9 \
+ mode transport reqid 42 $algo sel src $srcip/24 dst $dstip/24 \
+ offload dev $dev dir out
+ check_err $?
+ ip x s add proto esp src $dstip dst $srcip spi 9 \
+ mode transport reqid 42 $algo sel src $dstip/24 dst $srcip/24 \
+ offload dev $dev dir in
+ check_err $?
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: ipsec_offload can't create SA"
+ return 1
+ fi
+
+ # does offload show up in ip output
+ lines=`ip x s list | grep -c "crypto offload parameters: dev $dev dir"`
+ if [ $lines -ne 2 ] ; then
+ echo "FAIL: ipsec_offload SA offload missing from list output"
+ check_err 1
+ fi
+
+ # use ping to exercise the Tx path
+ ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null
+
+ # does driver have correct offload info
+ diff $sysfsf - << EOF
+SA count=2 tx=3
+sa[0] tx ipaddr=0x00000000 00000000 00000000 00000000
+sa[0] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
+sa[0] key=0x34333231 38373635 32313039 36353433
+sa[1] rx ipaddr=0x00000000 00000000 00000000 037ba8c0
+sa[1] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
+sa[1] key=0x34333231 38373635 32313039 36353433
+EOF
+ if [ $? -ne 0 ] ; then
+ echo "FAIL: ipsec_offload incorrect driver data"
+ check_err 1
+ fi
+
+ # does offload get removed from driver
+ ip x s flush
+ ip x p flush
+ lines=`grep -c "SA count=0" $sysfsf`
+ if [ $lines -ne 1 ] ; then
+ echo "FAIL: ipsec_offload SA not removed from driver"
+ check_err 1
+ fi
+
+ # clean up any leftovers
+ ip link del $dev
+ rmmod netdevsim
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: ipsec_offload"
+ return 1
+ fi
+ echo "PASS: ipsec_offload"
+}
+
+kci_test_gretap()
+{
+ testns="testns"
+ DEV_NS=gretap00
+ ret=0
+
+ ip netns add "$testns"
+ if [ $? -ne 0 ]; then
+ echo "SKIP gretap tests: cannot add net namespace $testns"
+ return $ksft_skip
+ fi
+
+ ip link help gretap 2>&1 | grep -q "^Usage:"
+ if [ $? -ne 0 ];then
+ echo "SKIP: gretap: iproute2 too old"
+ ip netns del "$testns"
+ return $ksft_skip
+ fi
+
+ # test native tunnel
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type gretap seq \
+ key 102 local 172.16.1.100 remote 172.16.1.200
+ check_err $?
+
+ ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+ check_err $?
+
+ ip netns exec "$testns" ip link set dev $DEV_NS up
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ # test external mode
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type gretap external
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: gretap"
+ ip netns del "$testns"
+ return 1
+ fi
+ echo "PASS: gretap"
+
+ ip netns del "$testns"
+}
+
+kci_test_ip6gretap()
+{
+ testns="testns"
+ DEV_NS=ip6gretap00
+ ret=0
+
+ ip netns add "$testns"
+ if [ $? -ne 0 ]; then
+ echo "SKIP ip6gretap tests: cannot add net namespace $testns"
+ return $ksft_skip
+ fi
+
+ ip link help ip6gretap 2>&1 | grep -q "^Usage:"
+ if [ $? -ne 0 ];then
+ echo "SKIP: ip6gretap: iproute2 too old"
+ ip netns del "$testns"
+ return $ksft_skip
+ fi
+
+ # test native tunnel
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6gretap seq \
+ key 102 local fc00:100::1 remote fc00:100::2
+ check_err $?
+
+ ip netns exec "$testns" ip addr add dev "$DEV_NS" fc00:200::1/96
+ check_err $?
+
+ ip netns exec "$testns" ip link set dev $DEV_NS up
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ # test external mode
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6gretap external
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: ip6gretap"
+ ip netns del "$testns"
+ return 1
+ fi
+ echo "PASS: ip6gretap"
+
+ ip netns del "$testns"
+}
+
+kci_test_erspan()
+{
+ testns="testns"
+ DEV_NS=erspan00
+ ret=0
+
+ ip link help erspan 2>&1 | grep -q "^Usage:"
+ if [ $? -ne 0 ];then
+ echo "SKIP: erspan: iproute2 too old"
+ return $ksft_skip
+ fi
+
+ ip netns add "$testns"
+ if [ $? -ne 0 ]; then
+ echo "SKIP erspan tests: cannot add net namespace $testns"
+ return $ksft_skip
+ fi
+
+ # test native tunnel erspan v1
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan seq \
+ key 102 local 172.16.1.100 remote 172.16.1.200 \
+ erspan_ver 1 erspan 488
+ check_err $?
+
+ ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+ check_err $?
+
+ ip netns exec "$testns" ip link set dev $DEV_NS up
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ # test native tunnel erspan v2
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan seq \
+ key 102 local 172.16.1.100 remote 172.16.1.200 \
+ erspan_ver 2 erspan_dir ingress erspan_hwid 7
+ check_err $?
+
+ ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+ check_err $?
+
+ ip netns exec "$testns" ip link set dev $DEV_NS up
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ # test external mode
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan external
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: erspan"
+ ip netns del "$testns"
+ return 1
+ fi
+ echo "PASS: erspan"
+
+ ip netns del "$testns"
+}
+
+kci_test_ip6erspan()
+{
+ testns="testns"
+ DEV_NS=ip6erspan00
+ ret=0
+
+ ip link help ip6erspan 2>&1 | grep -q "^Usage:"
+ if [ $? -ne 0 ];then
+ echo "SKIP: ip6erspan: iproute2 too old"
+ return $ksft_skip
+ fi
+
+ ip netns add "$testns"
+ if [ $? -ne 0 ]; then
+ echo "SKIP ip6erspan tests: cannot add net namespace $testns"
+ return $ksft_skip
+ fi
+
+ # test native tunnel ip6erspan v1
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6erspan seq \
+ key 102 local fc00:100::1 remote fc00:100::2 \
+ erspan_ver 1 erspan 488
+ check_err $?
+
+ ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+ check_err $?
+
+ ip netns exec "$testns" ip link set dev $DEV_NS up
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ # test native tunnel ip6erspan v2
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6erspan seq \
+ key 102 local fc00:100::1 remote fc00:100::2 \
+ erspan_ver 2 erspan_dir ingress erspan_hwid 7
+ check_err $?
+
+ ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+ check_err $?
+
+ ip netns exec "$testns" ip link set dev $DEV_NS up
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ # test external mode
+ ip netns exec "$testns" ip link add dev "$DEV_NS" \
+ type ip6erspan external
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: ip6erspan"
+ ip netns del "$testns"
+ return 1
+ fi
+ echo "PASS: ip6erspan"
+
+ ip netns del "$testns"
+}
+
+kci_test_rtnl()
+{
+ kci_add_dummy
+ if [ $ret -ne 0 ];then
+ echo "FAIL: cannot add dummy interface"
+ return 1
+ fi
+
+ kci_test_polrouting
+ kci_test_route_get
+ kci_test_addrlft
+ kci_test_tc
+ kci_test_gre
+ kci_test_gretap
+ kci_test_ip6gretap
+ kci_test_erspan
+ kci_test_ip6erspan
+ kci_test_bridge
+ kci_test_addrlabel
+ kci_test_ifalias
+ kci_test_vrf
+ kci_test_encap
+ kci_test_macsec
+ kci_test_ipsec
+ kci_test_ipsec_offload
+
+ kci_del_dummy
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+for x in ip tc;do
+ $x -Version 2>/dev/null >/dev/null
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without the $x tool"
+ exit $ksft_skip
+ fi
+done
+
+kci_test_rtnl
+
+exit $ret
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
new file mode 100755
index 000000000..bea079edc
--- /dev/null
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -0,0 +1,27 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+if [ $(id -u) != 0 ]; then
+ echo $msg must be run as root >&2
+ exit 0
+fi
+
+echo "--------------------"
+echo "running psock_fanout test"
+echo "--------------------"
+./in_netns.sh ./psock_fanout
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+else
+ echo "[PASS]"
+fi
+
+echo "--------------------"
+echo "running psock_tpacket test"
+echo "--------------------"
+./in_netns.sh ./psock_tpacket
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+else
+ echo "[PASS]"
+fi
diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests
new file mode 100755
index 000000000..14e41faf2
--- /dev/null
+++ b/tools/testing/selftests/net/run_netsocktests
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+echo "--------------------"
+echo "running socket test"
+echo "--------------------"
+./socket
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exit 1
+else
+ echo "[PASS]"
+fi
diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c
new file mode 100644
index 000000000..afca1ead6
--- /dev/null
+++ b/tools/testing/selftests/net/socket.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+struct socket_testcase {
+ int domain;
+ int type;
+ int protocol;
+
+ /* 0 = valid file descriptor
+ * -foo = error foo
+ */
+ int expect;
+
+ /* If non-zero, accept EAFNOSUPPORT to handle the case
+ * of the protocol not being configured into the kernel.
+ */
+ int nosupport_ok;
+};
+
+static struct socket_testcase tests[] = {
+ { AF_MAX, 0, 0, -EAFNOSUPPORT, 0 },
+ { AF_INET, SOCK_STREAM, IPPROTO_TCP, 0, 1 },
+ { AF_INET, SOCK_DGRAM, IPPROTO_TCP, -EPROTONOSUPPORT, 1 },
+ { AF_INET, SOCK_DGRAM, IPPROTO_UDP, 0, 1 },
+ { AF_INET, SOCK_STREAM, IPPROTO_UDP, -EPROTONOSUPPORT, 1 },
+};
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define ERR_STRING_SZ 64
+
+static int run_tests(void)
+{
+ char err_string1[ERR_STRING_SZ];
+ char err_string2[ERR_STRING_SZ];
+ int i, err;
+
+ err = 0;
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ struct socket_testcase *s = &tests[i];
+ int fd;
+
+ fd = socket(s->domain, s->type, s->protocol);
+ if (fd < 0) {
+ if (s->nosupport_ok &&
+ errno == EAFNOSUPPORT)
+ continue;
+
+ if (s->expect < 0 &&
+ errno == -s->expect)
+ continue;
+
+ strerror_r(-s->expect, err_string1, ERR_STRING_SZ);
+ strerror_r(errno, err_string2, ERR_STRING_SZ);
+
+ fprintf(stderr, "socket(%d, %d, %d) expected "
+ "err (%s) got (%s)\n",
+ s->domain, s->type, s->protocol,
+ err_string1, err_string2);
+
+ err = -1;
+ break;
+ } else {
+ close(fd);
+
+ if (s->expect < 0) {
+ strerror_r(errno, err_string1, ERR_STRING_SZ);
+
+ fprintf(stderr, "socket(%d, %d, %d) expected "
+ "success got err (%s)\n",
+ s->domain, s->type, s->protocol,
+ err_string1);
+
+ err = -1;
+ break;
+ }
+ }
+ }
+
+ return err;
+}
+
+int main(void)
+{
+ int err = run_tests();
+
+ return err;
+}
diff --git a/tools/testing/selftests/net/tcp_inq.c b/tools/testing/selftests/net/tcp_inq.c
new file mode 100644
index 000000000..d044b29dd
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_inq.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2018 Google Inc.
+ * Author: Soheil Hassas Yeganeh (soheil@google.com)
+ *
+ * Simple example on how to use TCP_INQ and TCP_CM_INQ.
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ */
+#define _GNU_SOURCE
+
+#include <error.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#ifndef TCP_INQ
+#define TCP_INQ 36
+#endif
+
+#ifndef TCP_CM_INQ
+#define TCP_CM_INQ TCP_INQ
+#endif
+
+#define BUF_SIZE 8192
+#define CMSG_SIZE 32
+
+static int family = AF_INET6;
+static socklen_t addr_len = sizeof(struct sockaddr_in6);
+static int port = 4974;
+
+static void setup_loopback_addr(int family, struct sockaddr_storage *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (family) {
+ case PF_INET:
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ addr4->sin_port = htons(port);
+ break;
+ case PF_INET6:
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_loopback;
+ addr6->sin6_port = htons(port);
+ break;
+ default:
+ error(1, 0, "illegal family");
+ }
+}
+
+void *start_server(void *arg)
+{
+ int server_fd = (int)(unsigned long)arg;
+ struct sockaddr_in addr;
+ socklen_t addrlen = sizeof(addr);
+ char *buf;
+ int fd;
+ int r;
+
+ buf = malloc(BUF_SIZE);
+
+ for (;;) {
+ fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen);
+ if (fd == -1) {
+ perror("accept");
+ break;
+ }
+ do {
+ r = send(fd, buf, BUF_SIZE, 0);
+ } while (r < 0 && errno == EINTR);
+ if (r < 0)
+ perror("send");
+ if (r != BUF_SIZE)
+ fprintf(stderr, "can only send %d bytes\n", r);
+ /* TCP_INQ can overestimate in-queue by one byte if we send
+ * the FIN packet. Sleep for 1 second, so that the client
+ * likely invoked recvmsg().
+ */
+ sleep(1);
+ close(fd);
+ }
+
+ free(buf);
+ close(server_fd);
+ pthread_exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_storage listen_addr, addr;
+ int c, one = 1, inq = -1;
+ pthread_t server_thread;
+ char cmsgbuf[CMSG_SIZE];
+ struct iovec iov[1];
+ struct cmsghdr *cm;
+ struct msghdr msg;
+ int server_fd, fd;
+ char *buf;
+
+ while ((c = getopt(argc, argv, "46p:")) != -1) {
+ switch (c) {
+ case '4':
+ family = PF_INET;
+ addr_len = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ family = PF_INET6;
+ addr_len = sizeof(struct sockaddr_in6);
+ break;
+ case 'p':
+ port = atoi(optarg);
+ break;
+ }
+ }
+
+ server_fd = socket(family, SOCK_STREAM, 0);
+ if (server_fd < 0)
+ error(1, errno, "server socket");
+ setup_loopback_addr(family, &listen_addr);
+ if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR,
+ &one, sizeof(one)) != 0)
+ error(1, errno, "setsockopt(SO_REUSEADDR)");
+ if (bind(server_fd, (const struct sockaddr *)&listen_addr,
+ addr_len) == -1)
+ error(1, errno, "bind");
+ if (listen(server_fd, 128) == -1)
+ error(1, errno, "listen");
+ if (pthread_create(&server_thread, NULL, start_server,
+ (void *)(unsigned long)server_fd) != 0)
+ error(1, errno, "pthread_create");
+
+ fd = socket(family, SOCK_STREAM, 0);
+ if (fd < 0)
+ error(1, errno, "client socket");
+ setup_loopback_addr(family, &addr);
+ if (connect(fd, (const struct sockaddr *)&addr, addr_len) == -1)
+ error(1, errno, "connect");
+ if (setsockopt(fd, SOL_TCP, TCP_INQ, &one, sizeof(one)) != 0)
+ error(1, errno, "setsockopt(TCP_INQ)");
+
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsgbuf;
+ msg.msg_controllen = sizeof(cmsgbuf);
+ msg.msg_flags = 0;
+
+ buf = malloc(BUF_SIZE);
+ iov[0].iov_base = buf;
+ iov[0].iov_len = BUF_SIZE / 2;
+
+ if (recvmsg(fd, &msg, 0) != iov[0].iov_len)
+ error(1, errno, "recvmsg");
+ if (msg.msg_flags & MSG_CTRUNC)
+ error(1, 0, "control message is truncated");
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm))
+ if (cm->cmsg_level == SOL_TCP && cm->cmsg_type == TCP_CM_INQ)
+ inq = *((int *) CMSG_DATA(cm));
+
+ if (inq != BUF_SIZE - iov[0].iov_len) {
+ fprintf(stderr, "unexpected inq: %d\n", inq);
+ exit(1);
+ }
+
+ printf("PASSED\n");
+ free(buf);
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
new file mode 100644
index 000000000..e8c5dff44
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright 2018 Google Inc.
+ * Author: Eric Dumazet (edumazet@google.com)
+ *
+ * Reference program demonstrating tcp mmap() usage,
+ * and SO_RCVLOWAT hints for receiver.
+ *
+ * Note : NIC with header split is needed to use mmap() on TCP :
+ * Each incoming frame must be a multiple of PAGE_SIZE bytes of TCP payload.
+ *
+ * How to use on loopback interface :
+ *
+ * ifconfig lo mtu 61512 # 15*4096 + 40 (ipv6 header) + 32 (TCP with TS option header)
+ * tcp_mmap -s -z &
+ * tcp_mmap -H ::1 -z
+ *
+ * Or leave default lo mtu, but use -M option to set TCP_MAXSEG option to (4096 + 12)
+ * (4096 : page size on x86, 12: TCP TS option length)
+ * tcp_mmap -s -z -M $((4096+12)) &
+ * tcp_mmap -H ::1 -z -M $((4096+12))
+ *
+ * Note: -z option on sender uses MSG_ZEROCOPY, which forces a copy when packets go through loopback interface.
+ * We might use sendfile() instead, but really this test program is about mmap(), for receivers ;)
+ *
+ * $ ./tcp_mmap -s & # Without mmap()
+ * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
+ * received 32768 MB (0 % mmap'ed) in 14.1157 s, 19.4732 Gbit
+ * cpu usage user:0.057 sys:7.815, 240.234 usec per MB, 65531 c-switches
+ * received 32768 MB (0 % mmap'ed) in 14.6833 s, 18.7204 Gbit
+ * cpu usage user:0.043 sys:8.103, 248.596 usec per MB, 65524 c-switches
+ * received 32768 MB (0 % mmap'ed) in 11.143 s, 24.6682 Gbit
+ * cpu usage user:0.044 sys:6.576, 202.026 usec per MB, 65519 c-switches
+ * received 32768 MB (0 % mmap'ed) in 14.9056 s, 18.4413 Gbit
+ * cpu usage user:0.036 sys:8.193, 251.129 usec per MB, 65530 c-switches
+ * $ kill %1 # kill tcp_mmap server
+ *
+ * $ ./tcp_mmap -s -z & # With mmap()
+ * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
+ * received 32768 MB (99.9939 % mmap'ed) in 6.73792 s, 40.7956 Gbit
+ * cpu usage user:0.045 sys:2.827, 87.6465 usec per MB, 65532 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.26732 s, 37.8238 Gbit
+ * cpu usage user:0.037 sys:3.087, 95.3369 usec per MB, 65532 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.61661 s, 36.0893 Gbit
+ * cpu usage user:0.046 sys:3.559, 110.016 usec per MB, 65529 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.43764 s, 36.9577 Gbit
+ * cpu usage user:0.035 sys:3.467, 106.873 usec per MB, 65530 c-switches
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <error.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <poll.h>
+#include <linux/tcp.h>
+#include <assert.h>
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY 0x4000000
+#endif
+
+#define FILE_SZ (1UL << 35)
+static int cfg_family = AF_INET6;
+static socklen_t cfg_alen = sizeof(struct sockaddr_in6);
+static int cfg_port = 8787;
+
+static int rcvbuf; /* Default: autotuning. Can be set with -r <integer> option */
+static int sndbuf; /* Default: autotuning. Can be set with -w <integer> option */
+static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */
+static int xflg; /* hash received data (simple xor) (-h option) */
+static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */
+
+static int chunk_size = 512*1024;
+
+unsigned long htotal;
+
+static inline void prefetch(const void *x)
+{
+#if defined(__x86_64__)
+ asm volatile("prefetcht0 %P0" : : "m" (*(const char *)x));
+#endif
+}
+
+void hash_zone(void *zone, unsigned int length)
+{
+ unsigned long temp = htotal;
+
+ while (length >= 8*sizeof(long)) {
+ prefetch(zone + 384);
+ temp ^= *(unsigned long *)zone;
+ temp ^= *(unsigned long *)(zone + sizeof(long));
+ temp ^= *(unsigned long *)(zone + 2*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 3*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 4*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 5*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 6*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 7*sizeof(long));
+ zone += 8*sizeof(long);
+ length -= 8*sizeof(long);
+ }
+ while (length >= 1) {
+ temp ^= *(unsigned char *)zone;
+ zone += 1;
+ length--;
+ }
+ htotal = temp;
+}
+
+void *child_thread(void *arg)
+{
+ unsigned long total_mmap = 0, total = 0;
+ struct tcp_zerocopy_receive zc;
+ unsigned long delta_usec;
+ int flags = MAP_SHARED;
+ struct timeval t0, t1;
+ char *buffer = NULL;
+ void *addr = NULL;
+ double throughput;
+ struct rusage ru;
+ int lu, fd;
+
+ fd = (int)(unsigned long)arg;
+
+ gettimeofday(&t0, NULL);
+
+ fcntl(fd, F_SETFL, O_NDELAY);
+ buffer = malloc(chunk_size);
+ if (!buffer) {
+ perror("malloc");
+ goto error;
+ }
+ if (zflg) {
+ addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0);
+ if (addr == (void *)-1)
+ zflg = 0;
+ }
+ while (1) {
+ struct pollfd pfd = { .fd = fd, .events = POLLIN, };
+ int sub;
+
+ poll(&pfd, 1, 10000);
+ if (zflg) {
+ socklen_t zc_len = sizeof(zc);
+ int res;
+
+ zc.address = (__u64)addr;
+ zc.length = chunk_size;
+ zc.recv_skip_hint = 0;
+ res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
+ &zc, &zc_len);
+ if (res == -1)
+ break;
+
+ if (zc.length) {
+ assert(zc.length <= chunk_size);
+ total_mmap += zc.length;
+ if (xflg)
+ hash_zone(addr, zc.length);
+ total += zc.length;
+ }
+ if (zc.recv_skip_hint) {
+ assert(zc.recv_skip_hint <= chunk_size);
+ lu = read(fd, buffer, zc.recv_skip_hint);
+ if (lu > 0) {
+ if (xflg)
+ hash_zone(buffer, lu);
+ total += lu;
+ }
+ }
+ continue;
+ }
+ sub = 0;
+ while (sub < chunk_size) {
+ lu = read(fd, buffer + sub, chunk_size - sub);
+ if (lu == 0)
+ goto end;
+ if (lu < 0)
+ break;
+ if (xflg)
+ hash_zone(buffer + sub, lu);
+ total += lu;
+ sub += lu;
+ }
+ }
+end:
+ gettimeofday(&t1, NULL);
+ delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
+
+ throughput = 0;
+ if (delta_usec)
+ throughput = total * 8.0 / (double)delta_usec / 1000.0;
+ getrusage(RUSAGE_THREAD, &ru);
+ if (total > 1024*1024) {
+ unsigned long total_usec;
+ unsigned long mb = total >> 20;
+ total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec +
+ 1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec;
+ printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n"
+ " cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n",
+ total / (1024.0 * 1024.0),
+ 100.0*total_mmap/total,
+ (double)delta_usec / 1000000.0,
+ throughput,
+ (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0,
+ (double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0,
+ (double)total_usec/mb,
+ ru.ru_nvcsw);
+ }
+error:
+ free(buffer);
+ close(fd);
+ if (zflg)
+ munmap(addr, chunk_size);
+ pthread_exit(0);
+}
+
+static void apply_rcvsnd_buf(int fd)
+{
+ if (rcvbuf && setsockopt(fd, SOL_SOCKET,
+ SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) == -1) {
+ perror("setsockopt SO_RCVBUF");
+ }
+
+ if (sndbuf && setsockopt(fd, SOL_SOCKET,
+ SO_SNDBUF, &sndbuf, sizeof(sndbuf)) == -1) {
+ perror("setsockopt SO_SNDBUF");
+ }
+}
+
+
+static void setup_sockaddr(int domain, const char *str_addr,
+ struct sockaddr_storage *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (domain) {
+ case PF_INET:
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+ error(1, 0, "ipv4 parse error: %s", str_addr);
+ break;
+ case PF_INET6:
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+ error(1, 0, "ipv6 parse error: %s", str_addr);
+ break;
+ default:
+ error(1, 0, "illegal domain");
+ }
+}
+
+static void do_accept(int fdlisten)
+{
+ if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT,
+ &chunk_size, sizeof(chunk_size)) == -1) {
+ perror("setsockopt SO_RCVLOWAT");
+ }
+
+ apply_rcvsnd_buf(fdlisten);
+
+ while (1) {
+ struct sockaddr_in addr;
+ socklen_t addrlen = sizeof(addr);
+ pthread_t th;
+ int fd, res;
+
+ fd = accept(fdlisten, (struct sockaddr *)&addr, &addrlen);
+ if (fd == -1) {
+ perror("accept");
+ continue;
+ }
+ res = pthread_create(&th, NULL, child_thread,
+ (void *)(unsigned long)fd);
+ if (res) {
+ errno = res;
+ perror("pthread_create");
+ close(fd);
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_storage listenaddr, addr;
+ unsigned int max_pacing_rate = 0;
+ unsigned long total = 0;
+ char *host = NULL;
+ int fd, c, on = 1;
+ char *buffer;
+ int sflg = 0;
+ int mss = 0;
+
+ while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:")) != -1) {
+ switch (c) {
+ case '4':
+ cfg_family = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ cfg_family = PF_INET6;
+ cfg_alen = sizeof(struct sockaddr_in6);
+ break;
+ case 'p':
+ cfg_port = atoi(optarg);
+ break;
+ case 'H':
+ host = optarg;
+ break;
+ case 's': /* server : listen for incoming connections */
+ sflg++;
+ break;
+ case 'r':
+ rcvbuf = atoi(optarg);
+ break;
+ case 'w':
+ sndbuf = atoi(optarg);
+ break;
+ case 'z':
+ zflg = 1;
+ break;
+ case 'M':
+ mss = atoi(optarg);
+ break;
+ case 'x':
+ xflg = 1;
+ break;
+ case 'k':
+ keepflag = 1;
+ break;
+ case 'P':
+ max_pacing_rate = atoi(optarg) ;
+ break;
+ default:
+ exit(1);
+ }
+ }
+ if (sflg) {
+ int fdlisten = socket(cfg_family, SOCK_STREAM, 0);
+
+ if (fdlisten == -1) {
+ perror("socket");
+ exit(1);
+ }
+ apply_rcvsnd_buf(fdlisten);
+ setsockopt(fdlisten, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
+
+ setup_sockaddr(cfg_family, host, &listenaddr);
+
+ if (mss &&
+ setsockopt(fdlisten, IPPROTO_TCP, TCP_MAXSEG,
+ &mss, sizeof(mss)) == -1) {
+ perror("setsockopt TCP_MAXSEG");
+ exit(1);
+ }
+ if (bind(fdlisten, (const struct sockaddr *)&listenaddr, cfg_alen) == -1) {
+ perror("bind");
+ exit(1);
+ }
+ if (listen(fdlisten, 128) == -1) {
+ perror("listen");
+ exit(1);
+ }
+ do_accept(fdlisten);
+ }
+ buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (buffer == (char *)-1) {
+ perror("mmap");
+ exit(1);
+ }
+
+ fd = socket(cfg_family, SOCK_STREAM, 0);
+ if (fd == -1) {
+ perror("socket");
+ exit(1);
+ }
+ apply_rcvsnd_buf(fd);
+
+ setup_sockaddr(cfg_family, host, &addr);
+
+ if (mss &&
+ setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
+ perror("setsockopt TCP_MAXSEG");
+ exit(1);
+ }
+ if (connect(fd, (const struct sockaddr *)&addr, cfg_alen) == -1) {
+ perror("connect");
+ exit(1);
+ }
+ if (max_pacing_rate &&
+ setsockopt(fd, SOL_SOCKET, SO_MAX_PACING_RATE,
+ &max_pacing_rate, sizeof(max_pacing_rate)) == -1)
+ perror("setsockopt SO_MAX_PACING_RATE");
+
+ if (zflg && setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY,
+ &on, sizeof(on)) == -1) {
+ perror("setsockopt SO_ZEROCOPY, (-z option disabled)");
+ zflg = 0;
+ }
+ while (total < FILE_SZ) {
+ long wr = FILE_SZ - total;
+
+ if (wr > chunk_size)
+ wr = chunk_size;
+ /* Note : we just want to fill the pipe with 0 bytes */
+ wr = send(fd, buffer, wr, zflg ? MSG_ZEROCOPY : 0);
+ if (wr <= 0)
+ break;
+ total += wr;
+ }
+ close(fd);
+ munmap(buffer, chunk_size);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/test_bpf.sh b/tools/testing/selftests/net/test_bpf.sh
new file mode 100755
index 000000000..65677909c
--- /dev/null
+++ b/tools/testing/selftests/net/test_bpf.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs bpf test using test_bpf kernel module
+
+if /sbin/modprobe -q test_bpf ; then
+ /sbin/modprobe -q -r test_bpf;
+ echo "test_bpf: ok";
+else
+ echo "test_bpf: [FAIL]";
+ exit 1;
+fi
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
new file mode 100644
index 000000000..7549d39cc
--- /dev/null
+++ b/tools/testing/selftests/net/tls.c
@@ -0,0 +1,741 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/tls.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+
+#include <sys/types.h>
+#include <sys/sendfile.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include "../kselftest_harness.h"
+
+#define TLS_PAYLOAD_MAX_LEN 16384
+#define SOL_TLS 282
+
+FIXTURE(tls)
+{
+ int fd, cfd;
+ bool notls;
+};
+
+FIXTURE_SETUP(tls)
+{
+ struct tls12_crypto_info_aes_gcm_128 tls12;
+ struct sockaddr_in addr;
+ socklen_t len;
+ int sfd, ret;
+
+ self->notls = false;
+ len = sizeof(addr);
+
+ memset(&tls12, 0, sizeof(tls12));
+ tls12.info.version = TLS_1_2_VERSION;
+ tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr.sin_port = 0;
+
+ self->fd = socket(AF_INET, SOCK_STREAM, 0);
+ sfd = socket(AF_INET, SOCK_STREAM, 0);
+
+ ret = bind(sfd, &addr, sizeof(addr));
+ ASSERT_EQ(ret, 0);
+ ret = listen(sfd, 10);
+ ASSERT_EQ(ret, 0);
+
+ ret = getsockname(sfd, &addr, &len);
+ ASSERT_EQ(ret, 0);
+
+ ret = connect(self->fd, &addr, sizeof(addr));
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+ if (ret != 0) {
+ self->notls = true;
+ printf("Failure setting TCP_ULP, testing without tls\n");
+ }
+
+ if (!self->notls) {
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12,
+ sizeof(tls12));
+ ASSERT_EQ(ret, 0);
+ }
+
+ self->cfd = accept(sfd, &addr, &len);
+ ASSERT_GE(self->cfd, 0);
+
+ if (!self->notls) {
+ ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls",
+ sizeof("tls"));
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12,
+ sizeof(tls12));
+ ASSERT_EQ(ret, 0);
+ }
+
+ close(sfd);
+}
+
+FIXTURE_TEARDOWN(tls)
+{
+ close(self->fd);
+ close(self->cfd);
+}
+
+TEST_F(tls, sendfile)
+{
+ int filefd = open("/proc/self/exe", O_RDONLY);
+ struct stat st;
+
+ EXPECT_GE(filefd, 0);
+ fstat(filefd, &st);
+ EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
+}
+
+TEST_F(tls, send_then_sendfile)
+{
+ int filefd = open("/proc/self/exe", O_RDONLY);
+ char const *test_str = "test_send";
+ int to_send = strlen(test_str) + 1;
+ char recv_buf[10];
+ struct stat st;
+ char *buf;
+
+ EXPECT_GE(filefd, 0);
+ fstat(filefd, &st);
+ buf = (char *)malloc(st.st_size);
+
+ EXPECT_EQ(send(self->fd, test_str, to_send, 0), to_send);
+ EXPECT_EQ(recv(self->cfd, recv_buf, to_send, 0), to_send);
+ EXPECT_EQ(memcmp(test_str, recv_buf, to_send), 0);
+
+ EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
+ EXPECT_EQ(recv(self->cfd, buf, st.st_size, 0), st.st_size);
+}
+
+TEST_F(tls, recv_max)
+{
+ unsigned int send_len = TLS_PAYLOAD_MAX_LEN;
+ char recv_mem[TLS_PAYLOAD_MAX_LEN];
+ char buf[TLS_PAYLOAD_MAX_LEN];
+
+ EXPECT_GE(send(self->fd, buf, send_len, 0), 0);
+ EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1);
+ EXPECT_EQ(memcmp(buf, recv_mem, send_len), 0);
+}
+
+TEST_F(tls, recv_small)
+{
+ char const *test_str = "test_read";
+ int send_len = 10;
+ char buf[10];
+
+ send_len = strlen(test_str) + 1;
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls, msg_more)
+{
+ char const *test_str = "test_read";
+ int send_len = 10;
+ char buf[10 * 2];
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1);
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len * 2, MSG_DONTWAIT),
+ send_len * 2);
+ EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls, sendmsg_single)
+{
+ struct msghdr msg;
+
+ char const *test_str = "test_sendmsg";
+ size_t send_len = 13;
+ struct iovec vec;
+ char buf[13];
+
+ vec.iov_base = (char *)test_str;
+ vec.iov_len = send_len;
+ memset(&msg, 0, sizeof(struct msghdr));
+ msg.msg_iov = &vec;
+ msg.msg_iovlen = 1;
+ EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls, sendmsg_large)
+{
+ void *mem = malloc(16384);
+ size_t send_len = 16384;
+ size_t sends = 128;
+ struct msghdr msg;
+ size_t recvs = 0;
+ size_t sent = 0;
+
+ memset(&msg, 0, sizeof(struct msghdr));
+ while (sent++ < sends) {
+ struct iovec vec = { (void *)mem, send_len };
+
+ msg.msg_iov = &vec;
+ msg.msg_iovlen = 1;
+ EXPECT_EQ(sendmsg(self->cfd, &msg, 0), send_len);
+ }
+
+ while (recvs++ < sends)
+ EXPECT_NE(recv(self->fd, mem, send_len, 0), -1);
+
+ free(mem);
+}
+
+TEST_F(tls, sendmsg_multiple)
+{
+ char const *test_str = "test_sendmsg_multiple";
+ struct iovec vec[5];
+ char *test_strs[5];
+ struct msghdr msg;
+ int total_len = 0;
+ int len_cmp = 0;
+ int iov_len = 5;
+ char *buf;
+ int i;
+
+ memset(&msg, 0, sizeof(struct msghdr));
+ for (i = 0; i < iov_len; i++) {
+ test_strs[i] = (char *)malloc(strlen(test_str) + 1);
+ snprintf(test_strs[i], strlen(test_str) + 1, "%s", test_str);
+ vec[i].iov_base = (void *)test_strs[i];
+ vec[i].iov_len = strlen(test_strs[i]) + 1;
+ total_len += vec[i].iov_len;
+ }
+ msg.msg_iov = vec;
+ msg.msg_iovlen = iov_len;
+
+ EXPECT_EQ(sendmsg(self->cfd, &msg, 0), total_len);
+ buf = malloc(total_len);
+ EXPECT_NE(recv(self->fd, buf, total_len, 0), -1);
+ for (i = 0; i < iov_len; i++) {
+ EXPECT_EQ(memcmp(test_strs[i], buf + len_cmp,
+ strlen(test_strs[i])),
+ 0);
+ len_cmp += strlen(buf + len_cmp) + 1;
+ }
+ for (i = 0; i < iov_len; i++)
+ free(test_strs[i]);
+ free(buf);
+}
+
+TEST_F(tls, sendmsg_multiple_stress)
+{
+ char const *test_str = "abcdefghijklmno";
+ struct iovec vec[1024];
+ char *test_strs[1024];
+ int iov_len = 1024;
+ int total_len = 0;
+ char buf[1 << 14];
+ struct msghdr msg;
+ int len_cmp = 0;
+ int i;
+
+ memset(&msg, 0, sizeof(struct msghdr));
+ for (i = 0; i < iov_len; i++) {
+ test_strs[i] = (char *)malloc(strlen(test_str) + 1);
+ snprintf(test_strs[i], strlen(test_str) + 1, "%s", test_str);
+ vec[i].iov_base = (void *)test_strs[i];
+ vec[i].iov_len = strlen(test_strs[i]) + 1;
+ total_len += vec[i].iov_len;
+ }
+ msg.msg_iov = vec;
+ msg.msg_iovlen = iov_len;
+
+ EXPECT_EQ(sendmsg(self->fd, &msg, 0), total_len);
+ EXPECT_NE(recv(self->cfd, buf, total_len, 0), -1);
+
+ for (i = 0; i < iov_len; i++)
+ len_cmp += strlen(buf + len_cmp) + 1;
+
+ for (i = 0; i < iov_len; i++)
+ free(test_strs[i]);
+}
+
+TEST_F(tls, splice_from_pipe)
+{
+ int send_len = TLS_PAYLOAD_MAX_LEN;
+ char mem_send[TLS_PAYLOAD_MAX_LEN];
+ char mem_recv[TLS_PAYLOAD_MAX_LEN];
+ int p[2];
+
+ ASSERT_GE(pipe(p), 0);
+ EXPECT_GE(write(p[1], mem_send, send_len), 0);
+ EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), 0);
+ EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, splice_from_pipe2)
+{
+ int send_len = 16000;
+ char mem_send[16000];
+ char mem_recv[16000];
+ int p2[2];
+ int p[2];
+
+ ASSERT_GE(pipe(p), 0);
+ ASSERT_GE(pipe(p2), 0);
+ EXPECT_GE(write(p[1], mem_send, 8000), 0);
+ EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0);
+ EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0);
+ EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0);
+ EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, send_and_splice)
+{
+ int send_len = TLS_PAYLOAD_MAX_LEN;
+ char mem_send[TLS_PAYLOAD_MAX_LEN];
+ char mem_recv[TLS_PAYLOAD_MAX_LEN];
+ char const *test_str = "test_read";
+ int send_len2 = 10;
+ char buf[10];
+ int p[2];
+
+ ASSERT_GE(pipe(p), 0);
+ EXPECT_EQ(send(self->fd, test_str, send_len2, 0), send_len2);
+ EXPECT_EQ(recv(self->cfd, buf, send_len2, MSG_WAITALL), send_len2);
+ EXPECT_EQ(memcmp(test_str, buf, send_len2), 0);
+
+ EXPECT_GE(write(p[1], mem_send, send_len), send_len);
+ EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), send_len);
+
+ EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, splice_to_pipe)
+{
+ int send_len = TLS_PAYLOAD_MAX_LEN;
+ char mem_send[TLS_PAYLOAD_MAX_LEN];
+ char mem_recv[TLS_PAYLOAD_MAX_LEN];
+ int p[2];
+
+ ASSERT_GE(pipe(p), 0);
+ EXPECT_GE(send(self->fd, mem_send, send_len, 0), 0);
+ EXPECT_GE(splice(self->cfd, NULL, p[1], NULL, send_len, 0), 0);
+ EXPECT_GE(read(p[0], mem_recv, send_len), 0);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, recvmsg_single)
+{
+ char const *test_str = "test_recvmsg_single";
+ int send_len = strlen(test_str) + 1;
+ char buf[20];
+ struct msghdr hdr;
+ struct iovec vec;
+
+ memset(&hdr, 0, sizeof(hdr));
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ vec.iov_base = (char *)buf;
+ vec.iov_len = send_len;
+ hdr.msg_iovlen = 1;
+ hdr.msg_iov = &vec;
+ EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1);
+ EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recvmsg_single_max)
+{
+ int send_len = TLS_PAYLOAD_MAX_LEN;
+ char send_mem[TLS_PAYLOAD_MAX_LEN];
+ char recv_mem[TLS_PAYLOAD_MAX_LEN];
+ struct iovec vec;
+ struct msghdr hdr;
+
+ EXPECT_EQ(send(self->fd, send_mem, send_len, 0), send_len);
+ vec.iov_base = (char *)recv_mem;
+ vec.iov_len = TLS_PAYLOAD_MAX_LEN;
+
+ hdr.msg_iovlen = 1;
+ hdr.msg_iov = &vec;
+ EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1);
+ EXPECT_EQ(memcmp(send_mem, recv_mem, send_len), 0);
+}
+
+TEST_F(tls, recvmsg_multiple)
+{
+ unsigned int msg_iovlen = 1024;
+ unsigned int len_compared = 0;
+ struct iovec vec[1024];
+ char *iov_base[1024];
+ unsigned int iov_len = 16;
+ int send_len = 1 << 14;
+ char buf[1 << 14];
+ struct msghdr hdr;
+ int i;
+
+ EXPECT_EQ(send(self->fd, buf, send_len, 0), send_len);
+ for (i = 0; i < msg_iovlen; i++) {
+ iov_base[i] = (char *)malloc(iov_len);
+ vec[i].iov_base = iov_base[i];
+ vec[i].iov_len = iov_len;
+ }
+
+ hdr.msg_iovlen = msg_iovlen;
+ hdr.msg_iov = vec;
+ EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1);
+ for (i = 0; i < msg_iovlen; i++)
+ len_compared += iov_len;
+
+ for (i = 0; i < msg_iovlen; i++)
+ free(iov_base[i]);
+}
+
+TEST_F(tls, single_send_multiple_recv)
+{
+ unsigned int total_len = TLS_PAYLOAD_MAX_LEN * 2;
+ unsigned int send_len = TLS_PAYLOAD_MAX_LEN;
+ char send_mem[TLS_PAYLOAD_MAX_LEN * 2];
+ char recv_mem[TLS_PAYLOAD_MAX_LEN * 2];
+
+ EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0);
+ memset(recv_mem, 0, total_len);
+
+ EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1);
+ EXPECT_NE(recv(self->cfd, recv_mem + send_len, send_len, 0), -1);
+ EXPECT_EQ(memcmp(send_mem, recv_mem, total_len), 0);
+}
+
+TEST_F(tls, multiple_send_single_recv)
+{
+ unsigned int total_len = 2 * 10;
+ unsigned int send_len = 10;
+ char recv_mem[2 * 10];
+ char send_mem[10];
+
+ EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
+ EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
+ memset(recv_mem, 0, total_len);
+ EXPECT_EQ(recv(self->cfd, recv_mem, total_len, 0), total_len);
+
+ EXPECT_EQ(memcmp(send_mem, recv_mem, send_len), 0);
+ EXPECT_EQ(memcmp(send_mem, recv_mem + send_len, send_len), 0);
+}
+
+TEST_F(tls, recv_partial)
+{
+ char const *test_str = "test_read_partial";
+ char const *test_str_first = "test_read";
+ char const *test_str_second = "_partial";
+ int send_len = strlen(test_str) + 1;
+ char recv_mem[18];
+
+ memset(recv_mem, 0, sizeof(recv_mem));
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first), 0), -1);
+ EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0);
+ memset(recv_mem, 0, sizeof(recv_mem));
+ EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second), 0), -1);
+ EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)),
+ 0);
+}
+
+TEST_F(tls, recv_nonblock)
+{
+ char buf[4096];
+ bool err;
+
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1);
+ err = (errno == EAGAIN || errno == EWOULDBLOCK);
+ EXPECT_EQ(err, true);
+}
+
+TEST_F(tls, recv_peek)
+{
+ char const *test_str = "test_read_peek";
+ int send_len = strlen(test_str) + 1;
+ char buf[15];
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1);
+ EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+ memset(buf, 0, sizeof(buf));
+ EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recv_peek_multiple)
+{
+ char const *test_str = "test_read_peek";
+ int send_len = strlen(test_str) + 1;
+ unsigned int num_peeks = 100;
+ char buf[15];
+ int i;
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ for (i = 0; i < num_peeks; i++) {
+ EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1);
+ EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+ memset(buf, 0, sizeof(buf));
+ }
+ EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recv_peek_multiple_records)
+{
+ char const *test_str = "test_read_peek_mult_recs";
+ char const *test_str_first = "test_read_peek";
+ char const *test_str_second = "_mult_recs";
+ int len;
+ char buf[64];
+
+ len = strlen(test_str_first);
+ EXPECT_EQ(send(self->fd, test_str_first, len, 0), len);
+
+ len = strlen(test_str_second) + 1;
+ EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
+
+ len = strlen(test_str_first);
+ memset(buf, 0, len);
+ EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len);
+
+ /* MSG_PEEK can only peek into the current record. */
+ len = strlen(test_str_first);
+ EXPECT_EQ(memcmp(test_str_first, buf, len), 0);
+
+ len = strlen(test_str) + 1;
+ memset(buf, 0, len);
+ EXPECT_EQ(recv(self->cfd, buf, len, MSG_WAITALL), len);
+
+ /* Non-MSG_PEEK will advance strparser (and therefore record)
+ * however.
+ */
+ len = strlen(test_str) + 1;
+ EXPECT_EQ(memcmp(test_str, buf, len), 0);
+
+ /* MSG_MORE will hold current record open, so later MSG_PEEK
+ * will see everything.
+ */
+ len = strlen(test_str_first);
+ EXPECT_EQ(send(self->fd, test_str_first, len, MSG_MORE), len);
+
+ len = strlen(test_str_second) + 1;
+ EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
+
+ len = strlen(test_str) + 1;
+ memset(buf, 0, len);
+ EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len);
+
+ len = strlen(test_str) + 1;
+ EXPECT_EQ(memcmp(test_str, buf, len), 0);
+}
+
+TEST_F(tls, pollin)
+{
+ char const *test_str = "test_poll";
+ struct pollfd fd = { 0, 0, 0 };
+ char buf[10];
+ int send_len = 10;
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ fd.fd = self->cfd;
+ fd.events = POLLIN;
+
+ EXPECT_EQ(poll(&fd, 1, 20), 1);
+ EXPECT_EQ(fd.revents & POLLIN, 1);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ /* Test timing out */
+ EXPECT_EQ(poll(&fd, 1, 20), 0);
+}
+
+TEST_F(tls, poll_wait)
+{
+ char const *test_str = "test_poll_wait";
+ int send_len = strlen(test_str) + 1;
+ struct pollfd fd = { 0, 0, 0 };
+ char recv_mem[15];
+
+ fd.fd = self->cfd;
+ fd.events = POLLIN;
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ /* Set timeout to inf. secs */
+ EXPECT_EQ(poll(&fd, 1, -1), 1);
+ EXPECT_EQ(fd.revents & POLLIN, 1);
+ EXPECT_EQ(recv(self->cfd, recv_mem, send_len, 0), send_len);
+}
+
+TEST_F(tls, blocking)
+{
+ size_t data = 100000;
+ int res = fork();
+
+ EXPECT_NE(res, -1);
+
+ if (res) {
+ /* parent */
+ size_t left = data;
+ char buf[16384];
+ int status;
+ int pid2;
+
+ while (left) {
+ int res = send(self->fd, buf,
+ left > 16384 ? 16384 : left, 0);
+
+ EXPECT_GE(res, 0);
+ left -= res;
+ }
+
+ pid2 = wait(&status);
+ EXPECT_EQ(status, 0);
+ EXPECT_EQ(res, pid2);
+ } else {
+ /* child */
+ size_t left = data;
+ char buf[16384];
+
+ while (left) {
+ int res = recv(self->cfd, buf,
+ left > 16384 ? 16384 : left, 0);
+
+ EXPECT_GE(res, 0);
+ left -= res;
+ }
+ }
+}
+
+TEST_F(tls, nonblocking)
+{
+ size_t data = 100000;
+ int sendbuf = 100;
+ int flags;
+ int res;
+
+ flags = fcntl(self->fd, F_GETFL, 0);
+ fcntl(self->fd, F_SETFL, flags | O_NONBLOCK);
+ fcntl(self->cfd, F_SETFL, flags | O_NONBLOCK);
+
+ /* Ensure nonblocking behavior by imposing a small send
+ * buffer.
+ */
+ EXPECT_EQ(setsockopt(self->fd, SOL_SOCKET, SO_SNDBUF,
+ &sendbuf, sizeof(sendbuf)), 0);
+
+ res = fork();
+ EXPECT_NE(res, -1);
+
+ if (res) {
+ /* parent */
+ bool eagain = false;
+ size_t left = data;
+ char buf[16384];
+ int status;
+ int pid2;
+
+ while (left) {
+ int res = send(self->fd, buf,
+ left > 16384 ? 16384 : left, 0);
+
+ if (res == -1 && errno == EAGAIN) {
+ eagain = true;
+ usleep(10000);
+ continue;
+ }
+ EXPECT_GE(res, 0);
+ left -= res;
+ }
+
+ EXPECT_TRUE(eagain);
+ pid2 = wait(&status);
+
+ EXPECT_EQ(status, 0);
+ EXPECT_EQ(res, pid2);
+ } else {
+ /* child */
+ bool eagain = false;
+ size_t left = data;
+ char buf[16384];
+
+ while (left) {
+ int res = recv(self->cfd, buf,
+ left > 16384 ? 16384 : left, 0);
+
+ if (res == -1 && errno == EAGAIN) {
+ eagain = true;
+ usleep(10000);
+ continue;
+ }
+ EXPECT_GE(res, 0);
+ left -= res;
+ }
+ EXPECT_TRUE(eagain);
+ }
+}
+
+TEST_F(tls, control_msg)
+{
+ if (self->notls)
+ return;
+
+ char cbuf[CMSG_SPACE(sizeof(char))];
+ char const *test_str = "test_read";
+ int cmsg_len = sizeof(char);
+ char record_type = 100;
+ struct cmsghdr *cmsg;
+ struct msghdr msg;
+ int send_len = 10;
+ struct iovec vec;
+ char buf[10];
+
+ vec.iov_base = (char *)test_str;
+ vec.iov_len = 10;
+ memset(&msg, 0, sizeof(struct msghdr));
+ msg.msg_iov = &vec;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_TLS;
+ /* test sending non-record types. */
+ cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
+ cmsg->cmsg_len = CMSG_LEN(cmsg_len);
+ *CMSG_DATA(cmsg) = record_type;
+ msg.msg_controllen = cmsg->cmsg_len;
+
+ EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
+ /* Should fail because we didn't provide a control message */
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+
+ vec.iov_base = buf;
+ EXPECT_EQ(recvmsg(self->cfd, &msg, 0), send_len);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ EXPECT_NE(cmsg, NULL);
+ EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
+ EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
+ record_type = *((unsigned char *)CMSG_DATA(cmsg));
+ EXPECT_EQ(record_type, 100);
+ EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
new file mode 100644
index 000000000..23177b643
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso.c
@@ -0,0 +1,685 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <net/if.h>
+#include <linux/in.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU 0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT 103
+#endif
+
+#ifndef UDP_MAX_SEGMENTS
+#define UDP_MAX_SEGMENTS (1 << 6UL)
+#endif
+
+#define CONST_MTU_TEST 1500
+
+#define CONST_HDRLEN_V4 (sizeof(struct iphdr) + sizeof(struct udphdr))
+#define CONST_HDRLEN_V6 (sizeof(struct ip6_hdr) + sizeof(struct udphdr))
+
+#define CONST_MSS_V4 (CONST_MTU_TEST - CONST_HDRLEN_V4)
+#define CONST_MSS_V6 (CONST_MTU_TEST - CONST_HDRLEN_V6)
+
+#define CONST_MAX_SEGS_V4 (ETH_MAX_MTU / CONST_MSS_V4)
+#define CONST_MAX_SEGS_V6 (ETH_MAX_MTU / CONST_MSS_V6)
+
+static bool cfg_do_ipv4;
+static bool cfg_do_ipv6;
+static bool cfg_do_connected;
+static bool cfg_do_connectionless;
+static bool cfg_do_msgmore;
+static bool cfg_do_setsockopt;
+static int cfg_specific_test_id = -1;
+
+static const char cfg_ifname[] = "lo";
+static unsigned short cfg_port = 9000;
+
+static char buf[ETH_MAX_MTU];
+
+struct testcase {
+ int tlen; /* send() buffer size, may exceed mss */
+ bool tfail; /* send() call is expected to fail */
+ int gso_len; /* mss after applying gso */
+ int r_num_mss; /* recv(): number of calls of full mss */
+ int r_len_last; /* recv(): size of last non-mss dgram, if any */
+};
+
+const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
+const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
+
+struct testcase testcases_v4[] = {
+ {
+ /* no GSO: send a single byte */
+ .tlen = 1,
+ .r_len_last = 1,
+ },
+ {
+ /* no GSO: send a single MSS */
+ .tlen = CONST_MSS_V4,
+ .r_num_mss = 1,
+ },
+ {
+ /* no GSO: send a single MSS + 1B: fail */
+ .tlen = CONST_MSS_V4 + 1,
+ .tfail = true,
+ },
+ {
+ /* send a single MSS: will fall back to no GSO */
+ .tlen = CONST_MSS_V4,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = 1,
+ },
+ {
+ /* send a single MSS + 1B */
+ .tlen = CONST_MSS_V4 + 1,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = 1,
+ .r_len_last = 1,
+ },
+ {
+ /* send exactly 2 MSS */
+ .tlen = CONST_MSS_V4 * 2,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = 2,
+ },
+ {
+ /* send 2 MSS + 1B */
+ .tlen = (CONST_MSS_V4 * 2) + 1,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = 2,
+ .r_len_last = 1,
+ },
+ {
+ /* send MAX segs */
+ .tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4),
+ },
+
+ {
+ /* send MAX bytes */
+ .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = CONST_MAX_SEGS_V4,
+ .r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 -
+ (CONST_MAX_SEGS_V4 * CONST_MSS_V4),
+ },
+ {
+ /* send MAX + 1: fail */
+ .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1,
+ .gso_len = CONST_MSS_V4,
+ .tfail = true,
+ },
+ {
+ /* send a single 1B MSS: will fall back to no GSO */
+ .tlen = 1,
+ .gso_len = 1,
+ .r_num_mss = 1,
+ },
+ {
+ /* send 2 1B segments */
+ .tlen = 2,
+ .gso_len = 1,
+ .r_num_mss = 2,
+ },
+ {
+ /* send 2B + 2B + 1B segments */
+ .tlen = 5,
+ .gso_len = 2,
+ .r_num_mss = 2,
+ .r_len_last = 1,
+ },
+ {
+ /* send max number of min sized segments */
+ .tlen = UDP_MAX_SEGMENTS,
+ .gso_len = 1,
+ .r_num_mss = UDP_MAX_SEGMENTS,
+ },
+ {
+ /* send max number + 1 of min sized segments: fail */
+ .tlen = UDP_MAX_SEGMENTS + 1,
+ .gso_len = 1,
+ .tfail = true,
+ },
+ {
+ /* EOL */
+ }
+};
+
+#ifndef IP6_MAX_MTU
+#define IP6_MAX_MTU (ETH_MAX_MTU + sizeof(struct ip6_hdr))
+#endif
+
+struct testcase testcases_v6[] = {
+ {
+ /* no GSO: send a single byte */
+ .tlen = 1,
+ .r_len_last = 1,
+ },
+ {
+ /* no GSO: send a single MSS */
+ .tlen = CONST_MSS_V6,
+ .r_num_mss = 1,
+ },
+ {
+ /* no GSO: send a single MSS + 1B: fail */
+ .tlen = CONST_MSS_V6 + 1,
+ .tfail = true,
+ },
+ {
+ /* send a single MSS: will fall back to no GSO */
+ .tlen = CONST_MSS_V6,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = 1,
+ },
+ {
+ /* send a single MSS + 1B */
+ .tlen = CONST_MSS_V6 + 1,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = 1,
+ .r_len_last = 1,
+ },
+ {
+ /* send exactly 2 MSS */
+ .tlen = CONST_MSS_V6 * 2,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = 2,
+ },
+ {
+ /* send 2 MSS + 1B */
+ .tlen = (CONST_MSS_V6 * 2) + 1,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = 2,
+ .r_len_last = 1,
+ },
+ {
+ /* send MAX segs */
+ .tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6),
+ },
+
+ {
+ /* send MAX bytes */
+ .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = CONST_MAX_SEGS_V6,
+ .r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 -
+ (CONST_MAX_SEGS_V6 * CONST_MSS_V6),
+ },
+ {
+ /* send MAX + 1: fail */
+ .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1,
+ .gso_len = CONST_MSS_V6,
+ .tfail = true,
+ },
+ {
+ /* send a single 1B MSS: will fall back to no GSO */
+ .tlen = 1,
+ .gso_len = 1,
+ .r_num_mss = 1,
+ },
+ {
+ /* send 2 1B segments */
+ .tlen = 2,
+ .gso_len = 1,
+ .r_num_mss = 2,
+ },
+ {
+ /* send 2B + 2B + 1B segments */
+ .tlen = 5,
+ .gso_len = 2,
+ .r_num_mss = 2,
+ .r_len_last = 1,
+ },
+ {
+ /* send max number of min sized segments */
+ .tlen = UDP_MAX_SEGMENTS,
+ .gso_len = 1,
+ .r_num_mss = UDP_MAX_SEGMENTS,
+ },
+ {
+ /* send max number + 1 of min sized segments: fail */
+ .tlen = UDP_MAX_SEGMENTS + 1,
+ .gso_len = 1,
+ .tfail = true,
+ },
+ {
+ /* EOL */
+ }
+};
+
+static unsigned int get_device_mtu(int fd, const char *ifname)
+{
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ strcpy(ifr.ifr_name, ifname);
+
+ if (ioctl(fd, SIOCGIFMTU, &ifr))
+ error(1, errno, "ioctl get mtu");
+
+ return ifr.ifr_mtu;
+}
+
+static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu)
+{
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ ifr.ifr_mtu = mtu;
+ strcpy(ifr.ifr_name, ifname);
+
+ if (ioctl(fd, SIOCSIFMTU, &ifr))
+ error(1, errno, "ioctl set mtu");
+}
+
+static void set_device_mtu(int fd, int mtu)
+{
+ int val;
+
+ val = get_device_mtu(fd, cfg_ifname);
+ fprintf(stderr, "device mtu (orig): %u\n", val);
+
+ __set_device_mtu(fd, cfg_ifname, mtu);
+ val = get_device_mtu(fd, cfg_ifname);
+ if (val != mtu)
+ error(1, 0, "unable to set device mtu to %u\n", val);
+
+ fprintf(stderr, "device mtu (test): %u\n", val);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+ int level, name, val;
+
+ if (is_ipv4) {
+ level = SOL_IP;
+ name = IP_MTU_DISCOVER;
+ val = IP_PMTUDISC_DO;
+ } else {
+ level = SOL_IPV6;
+ name = IPV6_MTU_DISCOVER;
+ val = IPV6_PMTUDISC_DO;
+ }
+
+ if (setsockopt(fd, level, name, &val, sizeof(val)))
+ error(1, errno, "setsockopt path mtu");
+}
+
+static unsigned int get_path_mtu(int fd, bool is_ipv4)
+{
+ socklen_t vallen;
+ unsigned int mtu;
+ int ret;
+
+ vallen = sizeof(mtu);
+ if (is_ipv4)
+ ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen);
+ else
+ ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen);
+
+ if (ret)
+ error(1, errno, "getsockopt mtu");
+
+
+ fprintf(stderr, "path mtu (read): %u\n", mtu);
+ return mtu;
+}
+
+/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */
+static void set_route_mtu(int mtu, bool is_ipv4)
+{
+ struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+ struct nlmsghdr *nh;
+ struct rtattr *rta;
+ struct rtmsg *rt;
+ char data[NLMSG_ALIGN(sizeof(*nh)) +
+ NLMSG_ALIGN(sizeof(*rt)) +
+ NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) +
+ NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) +
+ NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))];
+ int fd, ret, alen, off = 0;
+
+ alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6);
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (fd == -1)
+ error(1, errno, "socket netlink");
+
+ memset(data, 0, sizeof(data));
+
+ nh = (void *)data;
+ nh->nlmsg_type = RTM_NEWROUTE;
+ nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
+ off += NLMSG_ALIGN(sizeof(*nh));
+
+ rt = (void *)(data + off);
+ rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6;
+ rt->rtm_table = RT_TABLE_MAIN;
+ rt->rtm_dst_len = alen << 3;
+ rt->rtm_protocol = RTPROT_BOOT;
+ rt->rtm_scope = RT_SCOPE_UNIVERSE;
+ rt->rtm_type = RTN_UNICAST;
+ off += NLMSG_ALIGN(sizeof(*rt));
+
+ rta = (void *)(data + off);
+ rta->rta_type = RTA_DST;
+ rta->rta_len = RTA_LENGTH(alen);
+ if (is_ipv4)
+ memcpy(RTA_DATA(rta), &addr4, alen);
+ else
+ memcpy(RTA_DATA(rta), &addr6, alen);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ rta = (void *)(data + off);
+ rta->rta_type = RTA_OIF;
+ rta->rta_len = RTA_LENGTH(sizeof(int));
+ *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo");
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* MTU is a subtype in a metrics type */
+ rta = (void *)(data + off);
+ rta->rta_type = RTA_METRICS;
+ rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int));
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* now fill MTU subtype. Note that it fits within above rta_len */
+ rta = (void *)(((char *) rta) + RTA_LENGTH(0));
+ rta->rta_type = RTAX_MTU;
+ rta->rta_len = RTA_LENGTH(sizeof(int));
+ *((int *)(RTA_DATA(rta))) = mtu;
+
+ nh->nlmsg_len = off;
+
+ ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr));
+ if (ret != off)
+ error(1, errno, "send netlink: %uB != %uB\n", ret, off);
+
+ if (close(fd))
+ error(1, errno, "close netlink");
+
+ fprintf(stderr, "route mtu (test): %u\n", mtu);
+}
+
+static bool __send_one(int fd, struct msghdr *msg, int flags)
+{
+ int ret;
+
+ ret = sendmsg(fd, msg, flags);
+ if (ret == -1 &&
+ (errno == EMSGSIZE || errno == ENOMEM || errno == EINVAL))
+ return false;
+ if (ret == -1)
+ error(1, errno, "sendmsg");
+ if (ret != msg->msg_iov->iov_len)
+ error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len);
+ if (msg->msg_flags)
+ error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags);
+
+ return true;
+}
+
+static bool send_one(int fd, int len, int gso_len,
+ struct sockaddr *addr, socklen_t alen)
+{
+ char control[CMSG_SPACE(sizeof(uint16_t))] = {0};
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ struct cmsghdr *cm;
+
+ iov.iov_base = buf;
+ iov.iov_len = len;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ msg.msg_name = addr;
+ msg.msg_namelen = alen;
+
+ if (gso_len && !cfg_do_setsockopt) {
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ cm = CMSG_FIRSTHDR(&msg);
+ cm->cmsg_level = SOL_UDP;
+ cm->cmsg_type = UDP_SEGMENT;
+ cm->cmsg_len = CMSG_LEN(sizeof(uint16_t));
+ *((uint16_t *) CMSG_DATA(cm)) = gso_len;
+ }
+
+ /* If MSG_MORE, send 1 byte followed by remainder */
+ if (cfg_do_msgmore && len > 1) {
+ iov.iov_len = 1;
+ if (!__send_one(fd, &msg, MSG_MORE))
+ error(1, 0, "send 1B failed");
+
+ iov.iov_base++;
+ iov.iov_len = len - 1;
+ }
+
+ return __send_one(fd, &msg, 0);
+}
+
+static int recv_one(int fd, int flags)
+{
+ int ret;
+
+ ret = recv(fd, buf, sizeof(buf), flags);
+ if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT))
+ return 0;
+ if (ret == -1)
+ error(1, errno, "recv");
+
+ return ret;
+}
+
+static void run_one(struct testcase *test, int fdt, int fdr,
+ struct sockaddr *addr, socklen_t alen)
+{
+ int i, ret, val, mss;
+ bool sent;
+
+ fprintf(stderr, "ipv%d tx:%d gso:%d %s\n",
+ addr->sa_family == AF_INET ? 4 : 6,
+ test->tlen, test->gso_len,
+ test->tfail ? "(fail)" : "");
+
+ val = test->gso_len;
+ if (cfg_do_setsockopt) {
+ if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val)))
+ error(1, errno, "setsockopt udp segment");
+ }
+
+ sent = send_one(fdt, test->tlen, test->gso_len, addr, alen);
+ if (sent && test->tfail)
+ error(1, 0, "send succeeded while expecting failure");
+ if (!sent && !test->tfail)
+ error(1, 0, "send failed while expecting success");
+ if (!sent)
+ return;
+
+ if (test->gso_len)
+ mss = test->gso_len;
+ else
+ mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6;
+
+
+ /* Recv all full MSS datagrams */
+ for (i = 0; i < test->r_num_mss; i++) {
+ ret = recv_one(fdr, 0);
+ if (ret != mss)
+ error(1, 0, "recv.%d: %d != %d", i, ret, mss);
+ }
+
+ /* Recv the non-full last datagram, if tlen was not a multiple of mss */
+ if (test->r_len_last) {
+ ret = recv_one(fdr, 0);
+ if (ret != test->r_len_last)
+ error(1, 0, "recv.%d: %d != %d (last)",
+ i, ret, test->r_len_last);
+ }
+
+ /* Verify received all data */
+ ret = recv_one(fdr, MSG_DONTWAIT);
+ if (ret)
+ error(1, 0, "recv: unexpected datagram");
+}
+
+static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen)
+{
+ struct testcase *tests, *test;
+
+ tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6;
+
+ for (test = tests; test->tlen; test++) {
+ /* if a specific test is given, then skip all others */
+ if (cfg_specific_test_id == -1 ||
+ cfg_specific_test_id == test - tests)
+ run_one(test, fdt, fdr, addr, alen);
+ }
+}
+
+static void run_test(struct sockaddr *addr, socklen_t alen)
+{
+ struct timeval tv = { .tv_usec = 100 * 1000 };
+ int fdr, fdt, val;
+
+ fdr = socket(addr->sa_family, SOCK_DGRAM, 0);
+ if (fdr == -1)
+ error(1, errno, "socket r");
+
+ if (bind(fdr, addr, alen))
+ error(1, errno, "bind");
+
+ /* Have tests fail quickly instead of hang */
+ if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+ error(1, errno, "setsockopt rcv timeout");
+
+ fdt = socket(addr->sa_family, SOCK_DGRAM, 0);
+ if (fdt == -1)
+ error(1, errno, "socket t");
+
+ /* Do not fragment these datagrams: only succeed if GSO works */
+ set_pmtu_discover(fdt, addr->sa_family == AF_INET);
+
+ if (cfg_do_connectionless) {
+ set_device_mtu(fdt, CONST_MTU_TEST);
+ run_all(fdt, fdr, addr, alen);
+ }
+
+ if (cfg_do_connected) {
+ set_device_mtu(fdt, CONST_MTU_TEST + 100);
+ set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET);
+
+ if (connect(fdt, addr, alen))
+ error(1, errno, "connect");
+
+ val = get_path_mtu(fdt, addr->sa_family == AF_INET);
+ if (val != CONST_MTU_TEST)
+ error(1, 0, "bad path mtu %u\n", val);
+
+ run_all(fdt, fdr, addr, 0 /* use connected addr */);
+ }
+
+ if (close(fdt))
+ error(1, errno, "close t");
+ if (close(fdr))
+ error(1, errno, "close r");
+}
+
+static void run_test_v4(void)
+{
+ struct sockaddr_in addr = {0};
+
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons(cfg_port);
+ addr.sin_addr = addr4;
+
+ run_test((void *)&addr, sizeof(addr));
+}
+
+static void run_test_v6(void)
+{
+ struct sockaddr_in6 addr = {0};
+
+ addr.sin6_family = AF_INET6;
+ addr.sin6_port = htons(cfg_port);
+ addr.sin6_addr = addr6;
+
+ run_test((void *)&addr, sizeof(addr));
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "46cCmst:")) != -1) {
+ switch (c) {
+ case '4':
+ cfg_do_ipv4 = true;
+ break;
+ case '6':
+ cfg_do_ipv6 = true;
+ break;
+ case 'c':
+ cfg_do_connected = true;
+ break;
+ case 'C':
+ cfg_do_connectionless = true;
+ break;
+ case 'm':
+ cfg_do_msgmore = true;
+ break;
+ case 's':
+ cfg_do_setsockopt = true;
+ break;
+ case 't':
+ cfg_specific_test_id = strtoul(optarg, NULL, 0);
+ break;
+ default:
+ error(1, 0, "%s: parse error", argv[0]);
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+
+ if (cfg_do_ipv4)
+ run_test_v4();
+ if (cfg_do_ipv6)
+ run_test_v6();
+
+ fprintf(stderr, "OK\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
new file mode 100755
index 000000000..fec24f584
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso regression tests
+
+echo "ipv4 cmsg"
+./in_netns.sh ./udpgso -4 -C
+
+echo "ipv4 setsockopt"
+./in_netns.sh ./udpgso -4 -C -s
+
+echo "ipv6 cmsg"
+./in_netns.sh ./udpgso -6 -C
+
+echo "ipv6 setsockopt"
+./in_netns.sh ./udpgso -6 -C -s
+
+echo "ipv4 connected"
+./in_netns.sh ./udpgso -4 -c
+
+# blocked on 2nd loopback address
+# echo "ipv6 connected"
+# ./in_netns.sh ./udpgso -6 -c
+
+echo "ipv4 msg_more"
+./in_netns.sh ./udpgso -4 -C -m
+
+echo "ipv6 msg_more"
+./in_netns.sh ./udpgso -6 -C -m
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
new file mode 100755
index 000000000..99e537ab5
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso benchmarks
+
+wake_children() {
+ local -r jobs="$(jobs -p)"
+
+ if [[ "${jobs}" != "" ]]; then
+ kill -1 ${jobs} 2>/dev/null
+ fi
+}
+trap wake_children EXIT
+
+run_one() {
+ local -r args=$@
+
+ ./udpgso_bench_rx &
+ ./udpgso_bench_rx -t &
+
+ ./udpgso_bench_tx ${args}
+}
+
+run_in_netns() {
+ local -r args=$@
+
+ ./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+ local -r args=$@
+
+ echo "udp"
+ run_in_netns ${args}
+
+ echo "udp gso"
+ run_in_netns ${args} -S
+}
+
+run_tcp() {
+ local -r args=$@
+
+ echo "tcp"
+ run_in_netns ${args} -t
+
+ echo "tcp zerocopy"
+ run_in_netns ${args} -t -z
+}
+
+run_all() {
+ local -r core_args="-l 4"
+ local -r ipv4_args="${core_args} -4 -D 127.0.0.1"
+ local -r ipv6_args="${core_args} -6 -D ::1"
+
+ echo "ipv4"
+ run_tcp "${ipv4_args}"
+ run_udp "${ipv4_args}"
+
+ echo "ipv6"
+ run_tcp "${ipv4_args}"
+ run_udp "${ipv6_args}"
+}
+
+if [[ $# -eq 0 ]]; then
+ run_all
+elif [[ $1 == "__subprocess" ]]; then
+ shift
+ run_one $@
+else
+ run_in_netns $@
+fi
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
new file mode 100644
index 000000000..727cf67a3
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static int cfg_port = 8000;
+static bool cfg_tcp;
+static bool cfg_verify;
+
+static bool interrupted;
+static unsigned long packets, bytes;
+
+static void sigint_handler(int signum)
+{
+ if (signum == SIGINT)
+ interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void do_poll(int fd)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.events = POLLIN;
+ pfd.revents = 0;
+ pfd.fd = fd;
+
+ do {
+ ret = poll(&pfd, 1, 10);
+ if (ret == -1)
+ error(1, errno, "poll");
+ if (ret == 0)
+ continue;
+ if (pfd.revents != POLLIN)
+ error(1, errno, "poll: 0x%x expected 0x%x\n",
+ pfd.revents, POLLIN);
+ } while (!ret && !interrupted);
+}
+
+static int do_socket(bool do_tcp)
+{
+ struct sockaddr_in6 addr = {0};
+ int fd, val;
+
+ fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket");
+
+ val = 1 << 21;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)))
+ error(1, errno, "setsockopt rcvbuf");
+ val = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)))
+ error(1, errno, "setsockopt reuseport");
+
+ addr.sin6_family = PF_INET6;
+ addr.sin6_port = htons(cfg_port);
+ addr.sin6_addr = in6addr_any;
+ if (bind(fd, (void *) &addr, sizeof(addr)))
+ error(1, errno, "bind");
+
+ if (do_tcp) {
+ int accept_fd = fd;
+
+ if (listen(accept_fd, 1))
+ error(1, errno, "listen");
+
+ do_poll(accept_fd);
+
+ fd = accept(accept_fd, NULL, NULL);
+ if (fd == -1)
+ error(1, errno, "accept");
+ if (close(accept_fd))
+ error(1, errno, "close accept fd");
+ }
+
+ return fd;
+}
+
+/* Flush all outstanding bytes for the tcp receive queue */
+static void do_flush_tcp(int fd)
+{
+ int ret;
+
+ while (true) {
+ /* MSG_TRUNC flushes up to len bytes */
+ ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ return;
+ if (ret == -1)
+ error(1, errno, "flush");
+ if (ret == 0) {
+ /* client detached */
+ exit(0);
+ }
+
+ packets++;
+ bytes += ret;
+ }
+
+}
+
+static char sanitized_char(char val)
+{
+ return (val >= 'a' && val <= 'z') ? val : '.';
+}
+
+static void do_verify_udp(const char *data, int len)
+{
+ char cur = data[0];
+ int i;
+
+ /* verify contents */
+ if (cur < 'a' || cur > 'z')
+ error(1, 0, "data initial byte out of range");
+
+ for (i = 1; i < len; i++) {
+ if (cur == 'z')
+ cur = 'a';
+ else
+ cur++;
+
+ if (data[i] != cur)
+ error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n",
+ i, len,
+ sanitized_char(data[i]), data[i],
+ sanitized_char(cur), cur);
+ }
+}
+
+/* Flush all outstanding datagrams. Verify first few bytes of each. */
+static void do_flush_udp(int fd)
+{
+ static char rbuf[ETH_DATA_LEN];
+ int ret, len, budget = 256;
+
+ len = cfg_verify ? sizeof(rbuf) : 0;
+ while (budget--) {
+ /* MSG_TRUNC will make return value full datagram length */
+ ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ return;
+ if (ret == -1)
+ error(1, errno, "recv");
+ if (len) {
+ if (ret == 0)
+ error(1, errno, "recv: 0 byte datagram\n");
+
+ do_verify_udp(rbuf, ret);
+ }
+
+ packets++;
+ bytes += ret;
+ }
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s [-tv] [-p port]", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "ptv")) != -1) {
+ switch (c) {
+ case 'p':
+ cfg_port = htons(strtoul(optarg, NULL, 0));
+ break;
+ case 't':
+ cfg_tcp = true;
+ break;
+ case 'v':
+ cfg_verify = true;
+ break;
+ }
+ }
+
+ if (optind != argc)
+ usage(argv[0]);
+
+ if (cfg_tcp && cfg_verify)
+ error(1, 0, "TODO: implement verify mode for tcp");
+}
+
+static void do_recv(void)
+{
+ unsigned long tnow, treport;
+ int fd;
+
+ fd = do_socket(cfg_tcp);
+
+ treport = gettimeofday_ms() + 1000;
+ do {
+ do_poll(fd);
+
+ if (cfg_tcp)
+ do_flush_tcp(fd);
+ else
+ do_flush_udp(fd);
+
+ tnow = gettimeofday_ms();
+ if (tnow > treport) {
+ if (packets)
+ fprintf(stderr,
+ "%s rx: %6lu MB/s %8lu calls/s\n",
+ cfg_tcp ? "tcp" : "udp",
+ bytes >> 20, packets);
+ bytes = packets = 0;
+ treport = tnow + 1000;
+ }
+
+ } while (!interrupted);
+
+ if (close(fd))
+ error(1, errno, "close");
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+
+ signal(SIGINT, sigint_handler);
+
+ do_recv();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
new file mode 100644
index 000000000..463a2cbd0
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU 0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT 103
+#endif
+
+#ifndef SO_ZEROCOPY
+#define SO_ZEROCOPY 60
+#endif
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY 0x4000000
+#endif
+
+#define NUM_PKT 100
+
+static bool cfg_cache_trash;
+static int cfg_cpu = -1;
+static int cfg_connected = true;
+static int cfg_family = PF_UNSPEC;
+static uint16_t cfg_mss;
+static int cfg_payload_len = (1472 * 42);
+static int cfg_port = 8000;
+static int cfg_runtime_ms = -1;
+static bool cfg_segment;
+static bool cfg_sendmmsg;
+static bool cfg_tcp;
+static bool cfg_zerocopy;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+
+static bool interrupted;
+static char buf[NUM_PKT][ETH_MAX_MTU];
+
+static void sigint_handler(int signum)
+{
+ if (signum == SIGINT)
+ interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static int set_cpu(int cpu)
+{
+ cpu_set_t mask;
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+ if (sched_setaffinity(0, sizeof(mask), &mask))
+ error(1, 0, "setaffinity %d", cpu);
+
+ return 0;
+}
+
+static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (domain) {
+ case PF_INET:
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(cfg_port);
+ if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+ error(1, 0, "ipv4 parse error: %s", str_addr);
+ break;
+ case PF_INET6:
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+ error(1, 0, "ipv6 parse error: %s", str_addr);
+ break;
+ default:
+ error(1, 0, "illegal domain");
+ }
+}
+
+static void flush_zerocopy(int fd)
+{
+ struct msghdr msg = {0}; /* flush */
+ int ret;
+
+ while (1) {
+ ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (ret == -1 && errno == EAGAIN)
+ break;
+ if (ret == -1)
+ error(1, errno, "errqueue");
+ if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC))
+ error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags);
+ msg.msg_flags = 0;
+ }
+}
+
+static int send_tcp(int fd, char *data)
+{
+ int ret, done = 0, count = 0;
+
+ while (done < cfg_payload_len) {
+ ret = send(fd, data + done, cfg_payload_len - done,
+ cfg_zerocopy ? MSG_ZEROCOPY : 0);
+ if (ret == -1)
+ error(1, errno, "write");
+
+ done += ret;
+ count++;
+ }
+
+ return count;
+}
+
+static int send_udp(int fd, char *data)
+{
+ int ret, total_len, len, count = 0;
+
+ total_len = cfg_payload_len;
+
+ while (total_len) {
+ len = total_len < cfg_mss ? total_len : cfg_mss;
+
+ ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0,
+ cfg_connected ? NULL : (void *)&cfg_dst_addr,
+ cfg_connected ? 0 : cfg_alen);
+ if (ret == -1)
+ error(1, errno, "write");
+ if (ret != len)
+ error(1, errno, "write: %uB != %uB\n", ret, len);
+
+ total_len -= len;
+ count++;
+ }
+
+ return count;
+}
+
+static int send_udp_sendmmsg(int fd, char *data)
+{
+ const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN;
+ struct mmsghdr mmsgs[max_nr_msg];
+ struct iovec iov[max_nr_msg];
+ unsigned int off = 0, left;
+ int i = 0, ret;
+
+ memset(mmsgs, 0, sizeof(mmsgs));
+
+ left = cfg_payload_len;
+ while (left) {
+ if (i == max_nr_msg)
+ error(1, 0, "sendmmsg: exceeds max_nr_msg");
+
+ iov[i].iov_base = data + off;
+ iov[i].iov_len = cfg_mss < left ? cfg_mss : left;
+
+ mmsgs[i].msg_hdr.msg_iov = iov + i;
+ mmsgs[i].msg_hdr.msg_iovlen = 1;
+
+ off += iov[i].iov_len;
+ left -= iov[i].iov_len;
+ i++;
+ }
+
+ ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+ if (ret == -1)
+ error(1, errno, "sendmmsg");
+
+ return ret;
+}
+
+static void send_udp_segment_cmsg(struct cmsghdr *cm)
+{
+ uint16_t *valp;
+
+ cm->cmsg_level = SOL_UDP;
+ cm->cmsg_type = UDP_SEGMENT;
+ cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss));
+ valp = (void *)CMSG_DATA(cm);
+ *valp = cfg_mss;
+}
+
+static int send_udp_segment(int fd, char *data)
+{
+ char control[CMSG_SPACE(sizeof(cfg_mss))] = {0};
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ int ret;
+
+ iov.iov_base = data;
+ iov.iov_len = cfg_payload_len;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+ send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg));
+
+ msg.msg_name = (void *)&cfg_dst_addr;
+ msg.msg_namelen = cfg_alen;
+
+ ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+ if (ret == -1)
+ error(1, errno, "sendmsg");
+ if (ret != iov.iov_len)
+ error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len);
+
+ return 1;
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]",
+ filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ const char *bind_addr = NULL;
+ int max_len, hdrlen;
+ int c;
+
+ while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) {
+ switch (c) {
+ case '4':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET6;
+ cfg_alen = sizeof(struct sockaddr_in6);
+ break;
+ case 'c':
+ cfg_cache_trash = true;
+ break;
+ case 'C':
+ cfg_cpu = strtol(optarg, NULL, 0);
+ break;
+ case 'D':
+ bind_addr = optarg;
+ break;
+ case 'l':
+ cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
+ break;
+ case 'm':
+ cfg_sendmmsg = true;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 's':
+ cfg_payload_len = strtoul(optarg, NULL, 0);
+ break;
+ case 'S':
+ cfg_segment = true;
+ break;
+ case 't':
+ cfg_tcp = true;
+ break;
+ case 'u':
+ cfg_connected = false;
+ break;
+ case 'z':
+ cfg_zerocopy = true;
+ break;
+ }
+ }
+
+ if (!bind_addr)
+ bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0";
+
+ setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr);
+
+ if (optind != argc)
+ usage(argv[0]);
+
+ if (cfg_family == PF_UNSPEC)
+ error(1, 0, "must pass one of -4 or -6");
+ if (cfg_tcp && !cfg_connected)
+ error(1, 0, "connectionless tcp makes no sense");
+ if (cfg_segment && cfg_sendmmsg)
+ error(1, 0, "cannot combine segment offload and sendmmsg");
+
+ if (cfg_family == PF_INET)
+ hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr);
+ else
+ hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr);
+
+ cfg_mss = ETH_DATA_LEN - hdrlen;
+ max_len = ETH_MAX_MTU - hdrlen;
+
+ if (cfg_payload_len > max_len)
+ error(1, 0, "payload length %u exceeds max %u",
+ cfg_payload_len, max_len);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+ int level, name, val;
+
+ if (is_ipv4) {
+ level = SOL_IP;
+ name = IP_MTU_DISCOVER;
+ val = IP_PMTUDISC_DO;
+ } else {
+ level = SOL_IPV6;
+ name = IPV6_MTU_DISCOVER;
+ val = IPV6_PMTUDISC_DO;
+ }
+
+ if (setsockopt(fd, level, name, &val, sizeof(val)))
+ error(1, errno, "setsockopt path mtu");
+}
+
+int main(int argc, char **argv)
+{
+ unsigned long num_msgs, num_sends;
+ unsigned long tnow, treport, tstop;
+ int fd, i, val;
+
+ parse_opts(argc, argv);
+
+ if (cfg_cpu > 0)
+ set_cpu(cfg_cpu);
+
+ for (i = 0; i < sizeof(buf[0]); i++)
+ buf[0][i] = 'a' + (i % 26);
+ for (i = 1; i < NUM_PKT; i++)
+ memcpy(buf[i], buf[0], sizeof(buf[0]));
+
+ signal(SIGINT, sigint_handler);
+
+ fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket");
+
+ if (cfg_zerocopy) {
+ val = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val)))
+ error(1, errno, "setsockopt zerocopy");
+ }
+
+ if (cfg_connected &&
+ connect(fd, (void *)&cfg_dst_addr, cfg_alen))
+ error(1, errno, "connect");
+
+ if (cfg_segment)
+ set_pmtu_discover(fd, cfg_family == PF_INET);
+
+ num_msgs = num_sends = 0;
+ tnow = gettimeofday_ms();
+ tstop = tnow + cfg_runtime_ms;
+ treport = tnow + 1000;
+
+ i = 0;
+ do {
+ if (cfg_tcp)
+ num_sends += send_tcp(fd, buf[i]);
+ else if (cfg_segment)
+ num_sends += send_udp_segment(fd, buf[i]);
+ else if (cfg_sendmmsg)
+ num_sends += send_udp_sendmmsg(fd, buf[i]);
+ else
+ num_sends += send_udp(fd, buf[i]);
+ num_msgs++;
+
+ if (cfg_zerocopy && ((num_msgs & 0xF) == 0))
+ flush_zerocopy(fd);
+
+ tnow = gettimeofday_ms();
+ if (tnow > treport) {
+ fprintf(stderr,
+ "%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n",
+ cfg_tcp ? "tcp" : "udp",
+ (num_msgs * cfg_payload_len) >> 20,
+ num_sends, num_msgs);
+ num_msgs = num_sends = 0;
+ treport = tnow + 1000;
+ }
+
+ /* cold cache when writing buffer */
+ if (cfg_cache_trash)
+ i = ++i < NUM_PKT ? i : 0;
+
+ } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
+
+ if (close(fd))
+ error(1, errno, "close");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
new file mode 100644
index 000000000..a37cb1192
--- /dev/null
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for netfilter selftests
+
+TEST_PROGS := nft_trans_stress.sh nft_nat.sh conntrack_icmp_related.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config
new file mode 100644
index 000000000..59caa8f71
--- /dev/null
+++ b/tools/testing/selftests/netfilter/config
@@ -0,0 +1,2 @@
+CONFIG_NET_NS=y
+CONFIG_NF_TABLES_INET=y
diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
new file mode 100755
index 000000000..b48e1833b
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+#
+# check that ICMP df-needed/pkttoobig icmp are set are set as related
+# state
+#
+# Setup is:
+#
+# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2
+# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280).
+# ping nsclient2 from nsclient1, checking that conntrack did set RELATED
+# 'fragmentation needed' icmp packet.
+#
+# In addition, nsrouter1 will perform IP masquerading, i.e. also
+# check the icmp errors are propagated to the correct host as per
+# nat of "established" icmp-echo "connection".
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+cleanup() {
+ for i in 1 2;do ip netns del nsclient$i;done
+ for i in 1 2;do ip netns del nsrouter$i;done
+}
+
+ipv4() {
+ echo -n 192.168.$1.2
+}
+
+ipv6 () {
+ echo -n dead:$1::2
+}
+
+check_counter()
+{
+ ns=$1
+ name=$2
+ expect=$3
+ local lret=0
+
+ cnt=$(ip netns exec $ns nft list counter inet filter "$name" | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2
+ ip netns exec $ns nft list counter inet filter "$name" 1>&2
+ lret=1
+ fi
+
+ return $lret
+}
+
+check_unknown()
+{
+ expect="packets 0 bytes 0"
+ for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+ check_counter $n "unknown" "$expect"
+ if [ $? -ne 0 ] ;then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+ ip netns add $n
+ ip -net $n link set lo up
+done
+
+DEV=veth0
+ip link add $DEV netns nsclient1 type veth peer name eth1 netns nsrouter1
+DEV=veth0
+ip link add $DEV netns nsclient2 type veth peer name eth1 netns nsrouter2
+
+DEV=veth0
+ip link add $DEV netns nsrouter1 type veth peer name eth2 netns nsrouter2
+
+DEV=veth0
+for i in 1 2; do
+ ip -net nsclient$i link set $DEV up
+ ip -net nsclient$i addr add $(ipv4 $i)/24 dev $DEV
+ ip -net nsclient$i addr add $(ipv6 $i)/64 dev $DEV
+done
+
+ip -net nsrouter1 link set eth1 up
+ip -net nsrouter1 link set veth0 up
+
+ip -net nsrouter2 link set eth1 up
+ip -net nsrouter2 link set eth2 up
+
+ip -net nsclient1 route add default via 192.168.1.1
+ip -net nsclient1 -6 route add default via dead:1::1
+
+ip -net nsclient2 route add default via 192.168.2.1
+ip -net nsclient2 route add default via dead:2::1
+
+i=3
+ip -net nsrouter1 addr add 192.168.1.1/24 dev eth1
+ip -net nsrouter1 addr add 192.168.3.1/24 dev veth0
+ip -net nsrouter1 addr add dead:1::1/64 dev eth1
+ip -net nsrouter1 addr add dead:3::1/64 dev veth0
+ip -net nsrouter1 route add default via 192.168.3.10
+ip -net nsrouter1 -6 route add default via dead:3::10
+
+ip -net nsrouter2 addr add 192.168.2.1/24 dev eth1
+ip -net nsrouter2 addr add 192.168.3.10/24 dev eth2
+ip -net nsrouter2 addr add dead:2::1/64 dev eth1
+ip -net nsrouter2 addr add dead:3::10/64 dev eth2
+ip -net nsrouter2 route add default via 192.168.3.1
+ip -net nsrouter2 route add default via dead:3::1
+
+sleep 2
+for i in 4 6; do
+ ip netns exec nsrouter1 sysctl -q net.ipv$i.conf.all.forwarding=1
+ ip netns exec nsrouter2 sysctl -q net.ipv$i.conf.all.forwarding=1
+done
+
+for netns in nsrouter1 nsrouter2; do
+ip netns exec $netns nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter related { }
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ meta l4proto icmpv6 icmpv6 type "packet-too-big" ct state "related" counter name "related" accept
+ meta l4proto icmp icmp type "destination-unreachable" ct state "related" counter name "related" accept
+ meta l4proto { icmp, icmpv6 } ct state new,established accept
+ counter name "unknown" drop
+ }
+}
+EOF
+done
+
+ip netns exec nsclient1 nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter related { }
+ chain input {
+ type filter hook input priority 0; policy accept;
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+ meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept
+ counter name "unknown" drop
+ }
+}
+EOF
+
+ip netns exec nsclient2 nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter new { }
+ counter established { }
+
+ chain input {
+ type filter hook input priority 0; policy accept;
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+ meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" accept
+ meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" accept
+ counter name "unknown" drop
+ }
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+ meta l4proto { icmp, icmpv6 } ct state "new" counter name "new"
+ meta l4proto { icmp, icmpv6 } ct state "established" counter name "established"
+ counter name "unknown" drop
+ }
+}
+EOF
+
+
+# make sure NAT core rewrites adress of icmp error if nat is used according to
+# conntrack nat information (icmp error will be directed at nsrouter1 address,
+# but it needs to be routed to nsclient1 address).
+ip netns exec nsrouter1 nft -f - <<EOF
+table ip nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ip protocol icmp oifname "veth0" counter masquerade
+ }
+}
+table ip6 nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ip6 nexthdr icmpv6 oifname "veth0" counter masquerade
+ }
+}
+EOF
+
+ip netns exec nsrouter2 ip link set eth1 mtu 1280
+ip netns exec nsclient2 ip link set veth0 mtu 1280
+sleep 1
+
+ip netns exec nsclient1 ping -c 1 -s 1000 -q -M do 192.168.2.2 >/dev/null
+if [ $? -ne 0 ]; then
+ echo "ERROR: netns ip routing/connectivity broken" 1>&2
+ cleanup
+ exit 1
+fi
+ip netns exec nsclient1 ping6 -q -c 1 -s 1000 dead:2::2 >/dev/null
+if [ $? -ne 0 ]; then
+ echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2
+ cleanup
+ exit 1
+fi
+
+check_unknown
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+expect="packets 0 bytes 0"
+for netns in nsrouter1 nsrouter2 nsclient1;do
+ check_counter "$netns" "related" "$expect"
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+done
+
+expect="packets 2 bytes 2076"
+check_counter nsclient2 "new" "$expect"
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+ip netns exec nsclient1 ping -q -c 1 -s 1300 -M do 192.168.2.2 > /dev/null
+if [ $? -eq 0 ]; then
+ echo "ERROR: ping should have failed with PMTU too big error" 1>&2
+ ret=1
+fi
+
+# nsrouter2 should have generated the icmp error, so
+# related counter should be 0 (its in forward).
+expect="packets 0 bytes 0"
+check_counter "nsrouter2" "related" "$expect"
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+# but nsrouter1 should have seen it, same for nsclient1.
+expect="packets 1 bytes 576"
+for netns in nsrouter1 nsclient1;do
+ check_counter "$netns" "related" "$expect"
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+done
+
+ip netns exec nsclient1 ping6 -c 1 -s 1300 dead:2::2 > /dev/null
+if [ $? -eq 0 ]; then
+ echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2
+ ret=1
+fi
+
+expect="packets 2 bytes 1856"
+for netns in nsrouter1 nsclient1;do
+ check_counter "$netns" "related" "$expect"
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+done
+
+if [ $ret -eq 0 ];then
+ echo "PASS: icmp mtu error had RELATED state"
+else
+ echo "ERROR: icmp error RELATED state test has failed"
+fi
+
+cleanup
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
new file mode 100755
index 000000000..f25f72a75
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -0,0 +1,766 @@
+#!/bin/bash
+#
+# This test is for basic NAT functionality: snat, dnat, redirect, masquerade.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip netns add ns0
+ip netns add ns1
+ip netns add ns2
+
+ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+ip link add veth1 netns ns0 type veth peer name eth0 netns ns2
+
+ip -net ns0 link set lo up
+ip -net ns0 link set veth0 up
+ip -net ns0 addr add 10.0.1.1/24 dev veth0
+ip -net ns0 addr add dead:1::1/64 dev veth0
+
+ip -net ns0 link set veth1 up
+ip -net ns0 addr add 10.0.2.1/24 dev veth1
+ip -net ns0 addr add dead:2::1/64 dev veth1
+
+for i in 1 2; do
+ ip -net ns$i link set lo up
+ ip -net ns$i link set eth0 up
+ ip -net ns$i addr add 10.0.$i.99/24 dev eth0
+ ip -net ns$i route add default via 10.0.$i.1
+ ip -net ns$i addr add dead:$i::99/64 dev eth0
+ ip -net ns$i route add default via dead:$i::1
+done
+
+bad_counter()
+{
+ local ns=$1
+ local counter=$2
+ local expect=$3
+
+ echo "ERROR: $counter counter in $ns has unexpected value (expected $expect)" 1>&2
+ ip netns exec $ns nft list counter inet filter $counter 1>&2
+}
+
+check_counters()
+{
+ ns=$1
+ local lret=0
+
+ cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84")
+ if [ $? -ne 0 ]; then
+ bad_counter $ns ns0in "packets 1 bytes 84"
+ lret=1
+ fi
+ cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84")
+ if [ $? -ne 0 ]; then
+ bad_counter $ns ns0out "packets 1 bytes 84"
+ lret=1
+ fi
+
+ expect="packets 1 bytes 104"
+ cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter $ns ns0in6 "$expect"
+ lret=1
+ fi
+ cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter $ns ns0out6 "$expect"
+ lret=1
+ fi
+
+ return $lret
+}
+
+check_ns0_counters()
+{
+ local ns=$1
+ local lret=0
+
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns0in | grep -q "packets 0 bytes 0")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns0in "packets 0 bytes 0"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns0in6 "packets 0 bytes 0"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns0out | grep -q "packets 0 bytes 0")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns0out "packets 0 bytes 0"
+ lret=1
+ fi
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns0out6 "packets 0 bytes 0"
+ lret=1
+ fi
+
+ for dir in "in" "out" ; do
+ expect="packets 1 bytes 84"
+ cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 $ns$dir "$expect"
+ lret=1
+ fi
+
+ expect="packets 1 bytes 104"
+ cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir}6 | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 $ns$dir6 "$expect"
+ lret=1
+ fi
+ done
+
+ return $lret
+}
+
+reset_counters()
+{
+ for i in 0 1 2;do
+ ip netns exec ns$i nft reset counters inet > /dev/null
+ done
+}
+
+test_local_dnat6()
+{
+ local lret=0
+ip netns exec ns0 nft -f - <<EOF
+table ip6 nat {
+ chain output {
+ type nat hook output priority 0; policy accept;
+ ip6 daddr dead:1::99 dnat to dead:2::99
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add add ip6 dnat hook"
+ return $ksft_skip
+ fi
+
+ # ping netns1, expect rewrite to netns2
+ ip netns exec ns0 ping -q -c 1 dead:1::99 > /dev/null
+ if [ $? -ne 0 ]; then
+ lret=1
+ echo "ERROR: ping6 failed"
+ return $lret
+ fi
+
+ expect="packets 0 bytes 0"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns2$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # expect 0 count in ns1
+ expect="packets 0 bytes 0"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # expect 1 packet in ns2
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was NATted to ns2"
+ ip netns exec ns0 nft flush chain ip6 nat output
+
+ return $lret
+}
+
+test_local_dnat()
+{
+ local lret=0
+ip netns exec ns0 nft -f - <<EOF
+table ip nat {
+ chain output {
+ type nat hook output priority 0; policy accept;
+ ip daddr 10.0.1.99 dnat to 10.0.2.99
+ }
+}
+EOF
+ # ping netns1, expect rewrite to netns2
+ ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
+ if [ $? -ne 0 ]; then
+ lret=1
+ echo "ERROR: ping failed"
+ return $lret
+ fi
+
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns2$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # expect 0 count in ns1
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # expect 1 packet in ns2
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2"
+
+ ip netns exec ns0 nft flush chain ip nat output
+
+ reset_counters
+ ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
+ if [ $? -ne 0 ]; then
+ lret=1
+ echo "ERROR: ping failed"
+ return $lret
+ fi
+
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns2$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # expect 1 count in ns1
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # expect 0 packet in ns2
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns2$dir "$expect"
+ lret=1
+ fi
+ done
+
+ test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush"
+
+ return $lret
+}
+
+
+test_masquerade6()
+{
+ local lret=0
+
+ ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+ ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping ns1 from ns2 via ipv6"
+ return 1
+ lret=1
+ fi
+
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns2$dir "$expect"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ reset_counters
+
+# add masquerading rule
+ip netns exec ns0 nft -f - <<EOF
+table ip6 nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ meta oif veth0 masquerade
+ }
+}
+EOF
+ ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading"
+ lret=1
+ fi
+
+ # ns1 should have seen packets from ns0, due to masquerade
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # ns1 should not have seen packets from ns2, due to masquerade
+ expect="packets 0 bytes 0"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ ip netns exec ns0 nft flush chain ip6 nat postrouting
+ if [ $? -ne 0 ]; then
+ echo "ERROR: Could not flush ip6 nat postrouting" 1>&2
+ lret=1
+ fi
+
+ test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2"
+
+ return $lret
+}
+
+test_masquerade()
+{
+ local lret=0
+
+ ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+ ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: canot ping ns1 from ns2"
+ lret=1
+ fi
+
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns2$dir "$expect"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ reset_counters
+
+# add masquerading rule
+ip netns exec ns0 nft -f - <<EOF
+table ip nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ meta oif veth0 masquerade
+ }
+}
+EOF
+ ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading"
+ lret=1
+ fi
+
+ # ns1 should have seen packets from ns0, due to masquerade
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # ns1 should not have seen packets from ns2, due to masquerade
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ ip netns exec ns0 nft flush chain ip nat postrouting
+ if [ $? -ne 0 ]; then
+ echo "ERROR: Could not flush nat postrouting" 1>&2
+ lret=1
+ fi
+
+ test $lret -eq 0 && echo "PASS: IP masquerade for ns2"
+
+ return $lret
+}
+
+test_redirect6()
+{
+ local lret=0
+
+ ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+ ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannnot ping ns1 from ns2 via ipv6"
+ lret=1
+ fi
+
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns2$dir "$expect"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ reset_counters
+
+# add redirect rule
+ip netns exec ns0 nft -f - <<EOF
+table ip6 nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect
+ }
+}
+EOF
+ ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect"
+ lret=1
+ fi
+
+ # ns1 should have seen no packets from ns2, due to redirection
+ expect="packets 0 bytes 0"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # ns0 should have seen packets from ns2, due to masquerade
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ ip netns exec ns0 nft delete table ip6 nat
+ if [ $? -ne 0 ]; then
+ echo "ERROR: Could not delete ip6 nat table" 1>&2
+ lret=1
+ fi
+
+ test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2"
+
+ return $lret
+}
+
+test_redirect()
+{
+ local lret=0
+
+ ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+ ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping ns1 from ns2"
+ lret=1
+ fi
+
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns2$dir "$expect"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ reset_counters
+
+# add redirect rule
+ip netns exec ns0 nft -f - <<EOF
+table ip nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect
+ }
+}
+EOF
+ ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping ns1 from ns2 with active ip redirect"
+ lret=1
+ fi
+
+ # ns1 should have seen no packets from ns2, due to redirection
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # ns0 should have seen packets from ns2, due to masquerade
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ ip netns exec ns0 nft delete table ip nat
+ if [ $? -ne 0 ]; then
+ echo "ERROR: Could not delete nat table" 1>&2
+ lret=1
+ fi
+
+ test $lret -eq 0 && echo "PASS: IP redirection for ns2"
+
+ return $lret
+}
+
+
+# ip netns exec ns0 ping -c 1 -q 10.0.$i.99
+for i in 0 1 2; do
+ip netns exec ns$i nft -f - <<EOF
+table inet filter {
+ counter ns0in {}
+ counter ns1in {}
+ counter ns2in {}
+
+ counter ns0out {}
+ counter ns1out {}
+ counter ns2out {}
+
+ counter ns0in6 {}
+ counter ns1in6 {}
+ counter ns2in6 {}
+
+ counter ns0out6 {}
+ counter ns1out6 {}
+ counter ns2out6 {}
+
+ map nsincounter {
+ type ipv4_addr : counter
+ elements = { 10.0.1.1 : "ns0in",
+ 10.0.2.1 : "ns0in",
+ 10.0.1.99 : "ns1in",
+ 10.0.2.99 : "ns2in" }
+ }
+
+ map nsincounter6 {
+ type ipv6_addr : counter
+ elements = { dead:1::1 : "ns0in6",
+ dead:2::1 : "ns0in6",
+ dead:1::99 : "ns1in6",
+ dead:2::99 : "ns2in6" }
+ }
+
+ map nsoutcounter {
+ type ipv4_addr : counter
+ elements = { 10.0.1.1 : "ns0out",
+ 10.0.2.1 : "ns0out",
+ 10.0.1.99: "ns1out",
+ 10.0.2.99: "ns2out" }
+ }
+
+ map nsoutcounter6 {
+ type ipv6_addr : counter
+ elements = { dead:1::1 : "ns0out6",
+ dead:2::1 : "ns0out6",
+ dead:1::99 : "ns1out6",
+ dead:2::99 : "ns2out6" }
+ }
+
+ chain input {
+ type filter hook input priority 0; policy accept;
+ counter name ip saddr map @nsincounter
+ icmpv6 type { "echo-request", "echo-reply" } counter name ip6 saddr map @nsincounter6
+ }
+ chain output {
+ type filter hook output priority 0; policy accept;
+ counter name ip daddr map @nsoutcounter
+ icmpv6 type { "echo-request", "echo-reply" } counter name ip6 daddr map @nsoutcounter6
+ }
+}
+EOF
+done
+
+sleep 3
+# test basic connectivity
+for i in 1 2; do
+ ip netns exec ns0 ping -c 1 -q 10.0.$i.99 > /dev/null
+ if [ $? -ne 0 ];then
+ echo "ERROR: Could not reach other namespace(s)" 1>&2
+ ret=1
+ fi
+
+ ip netns exec ns0 ping -c 1 -q dead:$i::99 > /dev/null
+ if [ $? -ne 0 ];then
+ echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2
+ ret=1
+ fi
+ check_counters ns$i
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+
+ check_ns0_counters ns$i
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+ reset_counters
+done
+
+if [ $ret -eq 0 ];then
+ echo "PASS: netns routing/connectivity: ns0 can reach ns1 and ns2"
+fi
+
+reset_counters
+test_local_dnat
+test_local_dnat6
+
+reset_counters
+test_masquerade
+test_masquerade6
+
+reset_counters
+test_redirect
+test_redirect6
+
+for i in 0 1 2; do ip netns del ns$i;done
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh
new file mode 100755
index 000000000..f1affd12c
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+#
+# This test is for stress-testing the nf_tables config plane path vs.
+# packet path processing: Make sure we never release rules that are
+# still visible to other cpus.
+#
+# set -e
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+testns=testns1
+tables="foo bar baz quux"
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+tmp=$(mktemp)
+
+for table in $tables; do
+ echo add table inet "$table" >> "$tmp"
+ echo flush table inet "$table" >> "$tmp"
+
+ echo "add chain inet $table INPUT { type filter hook input priority 0; }" >> "$tmp"
+ echo "add chain inet $table OUTPUT { type filter hook output priority 0; }" >> "$tmp"
+ for c in $(seq 1 400); do
+ chain=$(printf "chain%03u" "$c")
+ echo "add chain inet $table $chain" >> "$tmp"
+ done
+
+ for c in $(seq 1 400); do
+ chain=$(printf "chain%03u" "$c")
+ for BASE in INPUT OUTPUT; do
+ echo "add rule inet $table $BASE counter jump $chain" >> "$tmp"
+ done
+ echo "add rule inet $table $chain counter return" >> "$tmp"
+ done
+done
+
+ip netns add "$testns"
+ip -netns "$testns" link set lo up
+
+lscpu | grep ^CPU\(s\): | ( read cpu cpunum ;
+cpunum=$((cpunum-1))
+for i in $(seq 0 $cpunum);do
+ mask=$(printf 0x%x $((1<<$i)))
+ ip netns exec "$testns" taskset $mask ping -4 127.0.0.1 -fq > /dev/null &
+ ip netns exec "$testns" taskset $mask ping -6 ::1 -fq > /dev/null &
+done)
+
+sleep 1
+
+for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done
+
+for table in $tables;do
+ randsleep=$((RANDOM%10))
+ sleep $randsleep
+ ip netns exec "$testns" nft delete table inet $table 2>/dev/null
+done
+
+randsleep=$((RANDOM%10))
+sleep $randsleep
+
+pkill -9 ping
+
+wait
+
+rm -f "$tmp"
+ip netns del "$testns"
diff --git a/tools/testing/selftests/networking/timestamping/.gitignore b/tools/testing/selftests/networking/timestamping/.gitignore
new file mode 100644
index 000000000..d9355035e
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/.gitignore
@@ -0,0 +1,4 @@
+timestamping
+rxtimestamp
+txtimestamp
+hwtstamp_config
diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile
new file mode 100644
index 000000000..c46c0eefa
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -I../../../../../usr/include
+
+TEST_PROGS := hwtstamp_config rxtimestamp timestamping txtimestamp
+
+all: $(TEST_PROGS)
+
+top_srcdir = ../../../../..
+KSFT_KHDR_INSTALL := 1
+include ../../lib.mk
+
+clean:
+ rm -fr $(TEST_PROGS)
diff --git a/tools/testing/selftests/networking/timestamping/hwtstamp_config.c b/tools/testing/selftests/networking/timestamping/hwtstamp_config.c
new file mode 100644
index 000000000..e1fdee841
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/hwtstamp_config.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test program for SIOC{G,S}HWTSTAMP
+ * Copyright 2013 Solarflare Communications
+ * Author: Ben Hutchings
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+
+#include <linux/if.h>
+#include <linux/net_tstamp.h>
+#include <linux/sockios.h>
+
+static int
+lookup_value(const char **names, int size, const char *name)
+{
+ int value;
+
+ for (value = 0; value < size; value++)
+ if (names[value] && strcasecmp(names[value], name) == 0)
+ return value;
+
+ return -1;
+}
+
+static const char *
+lookup_name(const char **names, int size, int value)
+{
+ return (value >= 0 && value < size) ? names[value] : NULL;
+}
+
+static void list_names(FILE *f, const char **names, int size)
+{
+ int value;
+
+ for (value = 0; value < size; value++)
+ if (names[value])
+ fprintf(f, " %s\n", names[value]);
+}
+
+static const char *tx_types[] = {
+#define TX_TYPE(name) [HWTSTAMP_TX_ ## name] = #name
+ TX_TYPE(OFF),
+ TX_TYPE(ON),
+ TX_TYPE(ONESTEP_SYNC)
+#undef TX_TYPE
+};
+#define N_TX_TYPES ((int)(sizeof(tx_types) / sizeof(tx_types[0])))
+
+static const char *rx_filters[] = {
+#define RX_FILTER(name) [HWTSTAMP_FILTER_ ## name] = #name
+ RX_FILTER(NONE),
+ RX_FILTER(ALL),
+ RX_FILTER(SOME),
+ RX_FILTER(PTP_V1_L4_EVENT),
+ RX_FILTER(PTP_V1_L4_SYNC),
+ RX_FILTER(PTP_V1_L4_DELAY_REQ),
+ RX_FILTER(PTP_V2_L4_EVENT),
+ RX_FILTER(PTP_V2_L4_SYNC),
+ RX_FILTER(PTP_V2_L4_DELAY_REQ),
+ RX_FILTER(PTP_V2_L2_EVENT),
+ RX_FILTER(PTP_V2_L2_SYNC),
+ RX_FILTER(PTP_V2_L2_DELAY_REQ),
+ RX_FILTER(PTP_V2_EVENT),
+ RX_FILTER(PTP_V2_SYNC),
+ RX_FILTER(PTP_V2_DELAY_REQ),
+#undef RX_FILTER
+};
+#define N_RX_FILTERS ((int)(sizeof(rx_filters) / sizeof(rx_filters[0])))
+
+static void usage(void)
+{
+ fputs("Usage: hwtstamp_config if_name [tx_type rx_filter]\n"
+ "tx_type is any of (case-insensitive):\n",
+ stderr);
+ list_names(stderr, tx_types, N_TX_TYPES);
+ fputs("rx_filter is any of (case-insensitive):\n", stderr);
+ list_names(stderr, rx_filters, N_RX_FILTERS);
+}
+
+int main(int argc, char **argv)
+{
+ struct ifreq ifr;
+ struct hwtstamp_config config;
+ const char *name;
+ int sock;
+
+ if ((argc != 2 && argc != 4) || (strlen(argv[1]) >= IFNAMSIZ)) {
+ usage();
+ return 2;
+ }
+
+ if (argc == 4) {
+ config.flags = 0;
+ config.tx_type = lookup_value(tx_types, N_TX_TYPES, argv[2]);
+ config.rx_filter = lookup_value(rx_filters, N_RX_FILTERS, argv[3]);
+ if (config.tx_type < 0 || config.rx_filter < 0) {
+ usage();
+ return 2;
+ }
+ }
+
+ sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock < 0) {
+ perror("socket");
+ return 1;
+ }
+
+ strcpy(ifr.ifr_name, argv[1]);
+ ifr.ifr_data = (caddr_t)&config;
+
+ if (ioctl(sock, (argc == 2) ? SIOCGHWTSTAMP : SIOCSHWTSTAMP, &ifr)) {
+ perror("ioctl");
+ return 1;
+ }
+
+ printf("flags = %#x\n", config.flags);
+ name = lookup_name(tx_types, N_TX_TYPES, config.tx_type);
+ if (name)
+ printf("tx_type = %s\n", name);
+ else
+ printf("tx_type = %d\n", config.tx_type);
+ name = lookup_name(rx_filters, N_RX_FILTERS, config.rx_filter);
+ if (name)
+ printf("rx_filter = %s\n", name);
+ else
+ printf("rx_filter = %d\n", config.rx_filter);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/networking/timestamping/rxtimestamp.c b/tools/testing/selftests/networking/timestamping/rxtimestamp.c
new file mode 100644
index 000000000..c6428f1ac
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/rxtimestamp.c
@@ -0,0 +1,389 @@
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include <asm/types.h>
+#include <linux/net_tstamp.h>
+#include <linux/errqueue.h>
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+struct options {
+ int so_timestamp;
+ int so_timestampns;
+ int so_timestamping;
+};
+
+struct tstamps {
+ bool tstamp;
+ bool tstampns;
+ bool swtstamp;
+ bool hwtstamp;
+};
+
+struct socket_type {
+ char *friendly_name;
+ int type;
+ int protocol;
+ bool enabled;
+};
+
+struct test_case {
+ struct options sockopt;
+ struct tstamps expected;
+ bool enabled;
+};
+
+struct sof_flag {
+ int mask;
+ char *name;
+};
+
+static struct sof_flag sof_flags[] = {
+#define SOF_FLAG(f) { f, #f }
+ SOF_FLAG(SOF_TIMESTAMPING_SOFTWARE),
+ SOF_FLAG(SOF_TIMESTAMPING_RX_SOFTWARE),
+ SOF_FLAG(SOF_TIMESTAMPING_RX_HARDWARE),
+};
+
+static struct socket_type socket_types[] = {
+ { "ip", SOCK_RAW, IPPROTO_EGP },
+ { "udp", SOCK_DGRAM, IPPROTO_UDP },
+ { "tcp", SOCK_STREAM, IPPROTO_TCP },
+};
+
+static struct test_case test_cases[] = {
+ { {}, {} },
+ {
+ { so_timestamp: 1 },
+ { tstamp: true }
+ },
+ {
+ { so_timestampns: 1 },
+ { tstampns: true }
+ },
+ {
+ { so_timestamp: 1, so_timestampns: 1 },
+ { tstampns: true }
+ },
+ {
+ { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE },
+ {}
+ },
+ {
+ /* Loopback device does not support hw timestamps. */
+ { so_timestamping: SOF_TIMESTAMPING_RX_HARDWARE },
+ {}
+ },
+ {
+ { so_timestamping: SOF_TIMESTAMPING_SOFTWARE },
+ {}
+ },
+ {
+ { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE
+ | SOF_TIMESTAMPING_RX_HARDWARE },
+ {}
+ },
+ {
+ { so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+ | SOF_TIMESTAMPING_RX_SOFTWARE },
+ { swtstamp: true }
+ },
+ {
+ { so_timestamp: 1, so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+ | SOF_TIMESTAMPING_RX_SOFTWARE },
+ { tstamp: true, swtstamp: true }
+ },
+};
+
+static struct option long_options[] = {
+ { "list_tests", no_argument, 0, 'l' },
+ { "test_num", required_argument, 0, 'n' },
+ { "op_size", required_argument, 0, 's' },
+ { "tcp", no_argument, 0, 't' },
+ { "udp", no_argument, 0, 'u' },
+ { "ip", no_argument, 0, 'i' },
+ { NULL, 0, NULL, 0 },
+};
+
+static int next_port = 19999;
+static int op_size = 10 * 1024;
+
+void print_test_case(struct test_case *t)
+{
+ int f = 0;
+
+ printf("sockopts {");
+ if (t->sockopt.so_timestamp)
+ printf(" SO_TIMESTAMP ");
+ if (t->sockopt.so_timestampns)
+ printf(" SO_TIMESTAMPNS ");
+ if (t->sockopt.so_timestamping) {
+ printf(" SO_TIMESTAMPING: {");
+ for (f = 0; f < ARRAY_SIZE(sof_flags); f++)
+ if (t->sockopt.so_timestamping & sof_flags[f].mask)
+ printf(" %s |", sof_flags[f].name);
+ printf("}");
+ }
+ printf("} expected cmsgs: {");
+ if (t->expected.tstamp)
+ printf(" SCM_TIMESTAMP ");
+ if (t->expected.tstampns)
+ printf(" SCM_TIMESTAMPNS ");
+ if (t->expected.swtstamp || t->expected.hwtstamp) {
+ printf(" SCM_TIMESTAMPING {");
+ if (t->expected.swtstamp)
+ printf("0");
+ if (t->expected.swtstamp && t->expected.hwtstamp)
+ printf(",");
+ if (t->expected.hwtstamp)
+ printf("2");
+ printf("}");
+ }
+ printf("}\n");
+}
+
+void do_send(int src)
+{
+ int r;
+ char *buf = malloc(op_size);
+
+ memset(buf, 'z', op_size);
+ r = write(src, buf, op_size);
+ if (r < 0)
+ error(1, errno, "Failed to sendmsg");
+
+ free(buf);
+}
+
+bool do_recv(int rcv, int read_size, struct tstamps expected)
+{
+ const int CMSG_SIZE = 1024;
+
+ struct scm_timestamping *ts;
+ struct tstamps actual = {};
+ char cmsg_buf[CMSG_SIZE];
+ struct iovec recv_iov;
+ struct cmsghdr *cmsg;
+ bool failed = false;
+ struct msghdr hdr;
+ int flags = 0;
+ int r;
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.msg_iov = &recv_iov;
+ hdr.msg_iovlen = 1;
+ recv_iov.iov_base = malloc(read_size);
+ recv_iov.iov_len = read_size;
+
+ hdr.msg_control = cmsg_buf;
+ hdr.msg_controllen = sizeof(cmsg_buf);
+
+ r = recvmsg(rcv, &hdr, flags);
+ if (r < 0)
+ error(1, errno, "Failed to recvmsg");
+ if (r != read_size)
+ error(1, 0, "Only received %d bytes of payload.", r);
+
+ if (hdr.msg_flags & (MSG_TRUNC | MSG_CTRUNC))
+ error(1, 0, "Message was truncated.");
+
+ for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
+ if (cmsg->cmsg_level != SOL_SOCKET)
+ error(1, 0, "Unexpected cmsg_level %d",
+ cmsg->cmsg_level);
+ switch (cmsg->cmsg_type) {
+ case SCM_TIMESTAMP:
+ actual.tstamp = true;
+ break;
+ case SCM_TIMESTAMPNS:
+ actual.tstampns = true;
+ break;
+ case SCM_TIMESTAMPING:
+ ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
+ actual.swtstamp = !!ts->ts[0].tv_sec;
+ if (ts->ts[1].tv_sec != 0)
+ error(0, 0, "ts[1] should not be set.");
+ actual.hwtstamp = !!ts->ts[2].tv_sec;
+ break;
+ default:
+ error(1, 0, "Unexpected cmsg_type %d", cmsg->cmsg_type);
+ }
+ }
+
+#define VALIDATE(field) \
+ do { \
+ if (expected.field != actual.field) { \
+ if (expected.field) \
+ error(0, 0, "Expected " #field " to be set."); \
+ else \
+ error(0, 0, \
+ "Expected " #field " to not be set."); \
+ failed = true; \
+ } \
+ } while (0)
+
+ VALIDATE(tstamp);
+ VALIDATE(tstampns);
+ VALIDATE(swtstamp);
+ VALIDATE(hwtstamp);
+#undef VALIDATE
+
+ free(recv_iov.iov_base);
+
+ return failed;
+}
+
+void config_so_flags(int rcv, struct options o)
+{
+ int on = 1;
+
+ if (setsockopt(rcv, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0)
+ error(1, errno, "Failed to enable SO_REUSEADDR");
+
+ if (o.so_timestamp &&
+ setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMP,
+ &o.so_timestamp, sizeof(o.so_timestamp)) < 0)
+ error(1, errno, "Failed to enable SO_TIMESTAMP");
+
+ if (o.so_timestampns &&
+ setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMPNS,
+ &o.so_timestampns, sizeof(o.so_timestampns)) < 0)
+ error(1, errno, "Failed to enable SO_TIMESTAMPNS");
+
+ if (o.so_timestamping &&
+ setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMPING,
+ &o.so_timestamping, sizeof(o.so_timestamping)) < 0)
+ error(1, errno, "Failed to set SO_TIMESTAMPING");
+}
+
+bool run_test_case(struct socket_type s, struct test_case t)
+{
+ int port = (s.type == SOCK_RAW) ? 0 : next_port++;
+ int read_size = op_size;
+ struct sockaddr_in addr;
+ bool failed = false;
+ int src, dst, rcv;
+
+ src = socket(AF_INET, s.type, s.protocol);
+ if (src < 0)
+ error(1, errno, "Failed to open src socket");
+
+ dst = socket(AF_INET, s.type, s.protocol);
+ if (dst < 0)
+ error(1, errno, "Failed to open dst socket");
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ addr.sin_port = htons(port);
+
+ if (bind(dst, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ error(1, errno, "Failed to bind to port %d", port);
+
+ if (s.type == SOCK_STREAM && (listen(dst, 1) < 0))
+ error(1, errno, "Failed to listen");
+
+ if (connect(src, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ error(1, errno, "Failed to connect");
+
+ if (s.type == SOCK_STREAM) {
+ rcv = accept(dst, NULL, NULL);
+ if (rcv < 0)
+ error(1, errno, "Failed to accept");
+ close(dst);
+ } else {
+ rcv = dst;
+ }
+
+ config_so_flags(rcv, t.sockopt);
+ usleep(20000); /* setsockopt for SO_TIMESTAMPING is asynchronous */
+ do_send(src);
+
+ if (s.type == SOCK_RAW)
+ read_size += 20; /* for IP header */
+ failed = do_recv(rcv, read_size, t.expected);
+
+ close(rcv);
+ close(src);
+
+ return failed;
+}
+
+int main(int argc, char **argv)
+{
+ bool all_protocols = true;
+ bool all_tests = true;
+ int arg_index = 0;
+ int failures = 0;
+ int s, t, opt;
+
+ while ((opt = getopt_long(argc, argv, "", long_options,
+ &arg_index)) != -1) {
+ switch (opt) {
+ case 'l':
+ for (t = 0; t < ARRAY_SIZE(test_cases); t++) {
+ printf("%d\t", t);
+ print_test_case(&test_cases[t]);
+ }
+ return 0;
+ case 'n':
+ t = atoi(optarg);
+ if (t >= ARRAY_SIZE(test_cases))
+ error(1, 0, "Invalid test case: %d", t);
+ all_tests = false;
+ test_cases[t].enabled = true;
+ break;
+ case 's':
+ op_size = atoi(optarg);
+ break;
+ case 't':
+ all_protocols = false;
+ socket_types[2].enabled = true;
+ break;
+ case 'u':
+ all_protocols = false;
+ socket_types[1].enabled = true;
+ break;
+ case 'i':
+ all_protocols = false;
+ socket_types[0].enabled = true;
+ break;
+ default:
+ error(1, 0, "Failed to parse parameters.");
+ }
+ }
+
+ for (s = 0; s < ARRAY_SIZE(socket_types); s++) {
+ if (!all_protocols && !socket_types[s].enabled)
+ continue;
+
+ printf("Testing %s...\n", socket_types[s].friendly_name);
+ for (t = 0; t < ARRAY_SIZE(test_cases); t++) {
+ if (!all_tests && !test_cases[t].enabled)
+ continue;
+
+ printf("Starting testcase %d...\n", t);
+ if (run_test_case(socket_types[s], test_cases[t])) {
+ failures++;
+ printf("FAILURE in test case ");
+ print_test_case(&test_cases[t]);
+ }
+ }
+ }
+ if (!failures)
+ printf("PASSED.\n");
+ return failures;
+}
diff --git a/tools/testing/selftests/networking/timestamping/timestamping.c b/tools/testing/selftests/networking/timestamping/timestamping.c
new file mode 100644
index 000000000..900ed4b47
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/timestamping.c
@@ -0,0 +1,534 @@
+/*
+ * This program demonstrates how the various time stamping features in
+ * the Linux kernel work. It emulates the behavior of a PTP
+ * implementation in stand-alone master mode by sending PTPv1 Sync
+ * multicasts once every second. It looks for similar packets, but
+ * beyond that doesn't actually implement PTP.
+ *
+ * Outgoing packets are time stamped with SO_TIMESTAMPING with or
+ * without hardware support.
+ *
+ * Incoming packets are time stamped with SO_TIMESTAMPING with or
+ * without hardware support, SIOCGSTAMP[NS] (per-socket time stamp) and
+ * SO_TIMESTAMP[NS].
+ *
+ * Copyright (C) 2009 Intel Corporation.
+ * Author: Patrick Ohly <patrick.ohly@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include <asm/types.h>
+#include <linux/net_tstamp.h>
+#include <linux/errqueue.h>
+
+#ifndef SO_TIMESTAMPING
+# define SO_TIMESTAMPING 37
+# define SCM_TIMESTAMPING SO_TIMESTAMPING
+#endif
+
+#ifndef SO_TIMESTAMPNS
+# define SO_TIMESTAMPNS 35
+#endif
+
+#ifndef SIOCGSTAMPNS
+# define SIOCGSTAMPNS 0x8907
+#endif
+
+#ifndef SIOCSHWTSTAMP
+# define SIOCSHWTSTAMP 0x89b0
+#endif
+
+static void usage(const char *error)
+{
+ if (error)
+ printf("invalid option: %s\n", error);
+ printf("timestamping interface option*\n\n"
+ "Options:\n"
+ " IP_MULTICAST_LOOP - looping outgoing multicasts\n"
+ " SO_TIMESTAMP - normal software time stamping, ms resolution\n"
+ " SO_TIMESTAMPNS - more accurate software time stamping\n"
+ " SOF_TIMESTAMPING_TX_HARDWARE - hardware time stamping of outgoing packets\n"
+ " SOF_TIMESTAMPING_TX_SOFTWARE - software fallback for outgoing packets\n"
+ " SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n"
+ " SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n"
+ " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n"
+ " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n"
+ " SIOCGSTAMP - check last socket time stamp\n"
+ " SIOCGSTAMPNS - more accurate socket time stamp\n");
+ exit(1);
+}
+
+static void bail(const char *error)
+{
+ printf("%s: %s\n", error, strerror(errno));
+ exit(1);
+}
+
+static const unsigned char sync[] = {
+ 0x00, 0x01, 0x00, 0x01,
+ 0x5f, 0x44, 0x46, 0x4c,
+ 0x54, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x01,
+
+ /* fake uuid */
+ 0x00, 0x01,
+ 0x02, 0x03, 0x04, 0x05,
+
+ 0x00, 0x01, 0x00, 0x37,
+ 0x00, 0x00, 0x00, 0x08,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x49, 0x05, 0xcd, 0x01,
+ 0x29, 0xb1, 0x8d, 0xb0,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x01,
+
+ /* fake uuid */
+ 0x00, 0x01,
+ 0x02, 0x03, 0x04, 0x05,
+
+ 0x00, 0x00, 0x00, 0x37,
+ 0x00, 0x00, 0x00, 0x04,
+ 0x44, 0x46, 0x4c, 0x54,
+ 0x00, 0x00, 0xf0, 0x60,
+ 0x00, 0x01, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01,
+ 0x00, 0x00, 0xf0, 0x60,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x04,
+ 0x44, 0x46, 0x4c, 0x54,
+ 0x00, 0x01,
+
+ /* fake uuid */
+ 0x00, 0x01,
+ 0x02, 0x03, 0x04, 0x05,
+
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00
+};
+
+static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
+{
+ struct timeval now;
+ int res;
+
+ res = sendto(sock, sync, sizeof(sync), 0,
+ addr, addr_len);
+ gettimeofday(&now, 0);
+ if (res < 0)
+ printf("%s: %s\n", "send", strerror(errno));
+ else
+ printf("%ld.%06ld: sent %d bytes\n",
+ (long)now.tv_sec, (long)now.tv_usec,
+ res);
+}
+
+static void printpacket(struct msghdr *msg, int res,
+ char *data,
+ int sock, int recvmsg_flags,
+ int siocgstamp, int siocgstampns)
+{
+ struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name;
+ struct cmsghdr *cmsg;
+ struct timeval tv;
+ struct timespec ts;
+ struct timeval now;
+
+ gettimeofday(&now, 0);
+
+ printf("%ld.%06ld: received %s data, %d bytes from %s, %zu bytes control messages\n",
+ (long)now.tv_sec, (long)now.tv_usec,
+ (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
+ res,
+ inet_ntoa(from_addr->sin_addr),
+ msg->msg_controllen);
+ for (cmsg = CMSG_FIRSTHDR(msg);
+ cmsg;
+ cmsg = CMSG_NXTHDR(msg, cmsg)) {
+ printf(" cmsg len %zu: ", cmsg->cmsg_len);
+ switch (cmsg->cmsg_level) {
+ case SOL_SOCKET:
+ printf("SOL_SOCKET ");
+ switch (cmsg->cmsg_type) {
+ case SO_TIMESTAMP: {
+ struct timeval *stamp =
+ (struct timeval *)CMSG_DATA(cmsg);
+ printf("SO_TIMESTAMP %ld.%06ld",
+ (long)stamp->tv_sec,
+ (long)stamp->tv_usec);
+ break;
+ }
+ case SO_TIMESTAMPNS: {
+ struct timespec *stamp =
+ (struct timespec *)CMSG_DATA(cmsg);
+ printf("SO_TIMESTAMPNS %ld.%09ld",
+ (long)stamp->tv_sec,
+ (long)stamp->tv_nsec);
+ break;
+ }
+ case SO_TIMESTAMPING: {
+ struct timespec *stamp =
+ (struct timespec *)CMSG_DATA(cmsg);
+ printf("SO_TIMESTAMPING ");
+ printf("SW %ld.%09ld ",
+ (long)stamp->tv_sec,
+ (long)stamp->tv_nsec);
+ stamp++;
+ /* skip deprecated HW transformed */
+ stamp++;
+ printf("HW raw %ld.%09ld",
+ (long)stamp->tv_sec,
+ (long)stamp->tv_nsec);
+ break;
+ }
+ default:
+ printf("type %d", cmsg->cmsg_type);
+ break;
+ }
+ break;
+ case IPPROTO_IP:
+ printf("IPPROTO_IP ");
+ switch (cmsg->cmsg_type) {
+ case IP_RECVERR: {
+ struct sock_extended_err *err =
+ (struct sock_extended_err *)CMSG_DATA(cmsg);
+ printf("IP_RECVERR ee_errno '%s' ee_origin %d => %s",
+ strerror(err->ee_errno),
+ err->ee_origin,
+#ifdef SO_EE_ORIGIN_TIMESTAMPING
+ err->ee_origin == SO_EE_ORIGIN_TIMESTAMPING ?
+ "bounced packet" : "unexpected origin"
+#else
+ "probably SO_EE_ORIGIN_TIMESTAMPING"
+#endif
+ );
+ if (res < sizeof(sync))
+ printf(" => truncated data?!");
+ else if (!memcmp(sync, data + res - sizeof(sync),
+ sizeof(sync)))
+ printf(" => GOT OUR DATA BACK (HURRAY!)");
+ break;
+ }
+ case IP_PKTINFO: {
+ struct in_pktinfo *pktinfo =
+ (struct in_pktinfo *)CMSG_DATA(cmsg);
+ printf("IP_PKTINFO interface index %u",
+ pktinfo->ipi_ifindex);
+ break;
+ }
+ default:
+ printf("type %d", cmsg->cmsg_type);
+ break;
+ }
+ break;
+ default:
+ printf("level %d type %d",
+ cmsg->cmsg_level,
+ cmsg->cmsg_type);
+ break;
+ }
+ printf("\n");
+ }
+
+ if (siocgstamp) {
+ if (ioctl(sock, SIOCGSTAMP, &tv))
+ printf(" %s: %s\n", "SIOCGSTAMP", strerror(errno));
+ else
+ printf("SIOCGSTAMP %ld.%06ld\n",
+ (long)tv.tv_sec,
+ (long)tv.tv_usec);
+ }
+ if (siocgstampns) {
+ if (ioctl(sock, SIOCGSTAMPNS, &ts))
+ printf(" %s: %s\n", "SIOCGSTAMPNS", strerror(errno));
+ else
+ printf("SIOCGSTAMPNS %ld.%09ld\n",
+ (long)ts.tv_sec,
+ (long)ts.tv_nsec);
+ }
+}
+
+static void recvpacket(int sock, int recvmsg_flags,
+ int siocgstamp, int siocgstampns)
+{
+ char data[256];
+ struct msghdr msg;
+ struct iovec entry;
+ struct sockaddr_in from_addr;
+ struct {
+ struct cmsghdr cm;
+ char control[512];
+ } control;
+ int res;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = &entry;
+ msg.msg_iovlen = 1;
+ entry.iov_base = data;
+ entry.iov_len = sizeof(data);
+ msg.msg_name = (caddr_t)&from_addr;
+ msg.msg_namelen = sizeof(from_addr);
+ msg.msg_control = &control;
+ msg.msg_controllen = sizeof(control);
+
+ res = recvmsg(sock, &msg, recvmsg_flags|MSG_DONTWAIT);
+ if (res < 0) {
+ printf("%s %s: %s\n",
+ "recvmsg",
+ (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
+ strerror(errno));
+ } else {
+ printpacket(&msg, res, data,
+ sock, recvmsg_flags,
+ siocgstamp, siocgstampns);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int so_timestamping_flags = 0;
+ int so_timestamp = 0;
+ int so_timestampns = 0;
+ int siocgstamp = 0;
+ int siocgstampns = 0;
+ int ip_multicast_loop = 0;
+ char *interface;
+ int i;
+ int enabled = 1;
+ int sock;
+ struct ifreq device;
+ struct ifreq hwtstamp;
+ struct hwtstamp_config hwconfig, hwconfig_requested;
+ struct sockaddr_in addr;
+ struct ip_mreq imr;
+ struct in_addr iaddr;
+ int val;
+ socklen_t len;
+ struct timeval next;
+ size_t if_len;
+
+ if (argc < 2)
+ usage(0);
+ interface = argv[1];
+ if_len = strlen(interface);
+ if (if_len >= IFNAMSIZ) {
+ printf("interface name exceeds IFNAMSIZ\n");
+ exit(1);
+ }
+
+ for (i = 2; i < argc; i++) {
+ if (!strcasecmp(argv[i], "SO_TIMESTAMP"))
+ so_timestamp = 1;
+ else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS"))
+ so_timestampns = 1;
+ else if (!strcasecmp(argv[i], "SIOCGSTAMP"))
+ siocgstamp = 1;
+ else if (!strcasecmp(argv[i], "SIOCGSTAMPNS"))
+ siocgstampns = 1;
+ else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP"))
+ ip_multicast_loop = 1;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE;
+ else
+ usage(argv[i]);
+ }
+
+ sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (sock < 0)
+ bail("socket");
+
+ memset(&device, 0, sizeof(device));
+ memcpy(device.ifr_name, interface, if_len + 1);
+ if (ioctl(sock, SIOCGIFADDR, &device) < 0)
+ bail("getting interface IP address");
+
+ memset(&hwtstamp, 0, sizeof(hwtstamp));
+ memcpy(hwtstamp.ifr_name, interface, if_len + 1);
+ hwtstamp.ifr_data = (void *)&hwconfig;
+ memset(&hwconfig, 0, sizeof(hwconfig));
+ hwconfig.tx_type =
+ (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ?
+ HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+ hwconfig.rx_filter =
+ (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ?
+ HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE;
+ hwconfig_requested = hwconfig;
+ if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) {
+ if ((errno == EINVAL || errno == ENOTSUP) &&
+ hwconfig_requested.tx_type == HWTSTAMP_TX_OFF &&
+ hwconfig_requested.rx_filter == HWTSTAMP_FILTER_NONE)
+ printf("SIOCSHWTSTAMP: disabling hardware time stamping not possible\n");
+ else
+ bail("SIOCSHWTSTAMP");
+ }
+ printf("SIOCSHWTSTAMP: tx_type %d requested, got %d; rx_filter %d requested, got %d\n",
+ hwconfig_requested.tx_type, hwconfig.tx_type,
+ hwconfig_requested.rx_filter, hwconfig.rx_filter);
+
+ /* bind to PTP port */
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr.sin_port = htons(319 /* PTP event port */);
+ if (bind(sock,
+ (struct sockaddr *)&addr,
+ sizeof(struct sockaddr_in)) < 0)
+ bail("bind");
+
+ /* set multicast group for outgoing packets */
+ inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */
+ addr.sin_addr = iaddr;
+ imr.imr_multiaddr.s_addr = iaddr.s_addr;
+ imr.imr_interface.s_addr =
+ ((struct sockaddr_in *)&device.ifr_addr)->sin_addr.s_addr;
+ if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF,
+ &imr.imr_interface.s_addr, sizeof(struct in_addr)) < 0)
+ bail("set multicast");
+
+ /* join multicast group, loop our own packet */
+ if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP,
+ &imr, sizeof(struct ip_mreq)) < 0)
+ bail("join multicast group");
+
+ if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_LOOP,
+ &ip_multicast_loop, sizeof(enabled)) < 0) {
+ bail("loop multicast");
+ }
+
+ /* set socket options for time stamping */
+ if (so_timestamp &&
+ setsockopt(sock, SOL_SOCKET, SO_TIMESTAMP,
+ &enabled, sizeof(enabled)) < 0)
+ bail("setsockopt SO_TIMESTAMP");
+
+ if (so_timestampns &&
+ setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS,
+ &enabled, sizeof(enabled)) < 0)
+ bail("setsockopt SO_TIMESTAMPNS");
+
+ if (so_timestamping_flags &&
+ setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING,
+ &so_timestamping_flags,
+ sizeof(so_timestamping_flags)) < 0)
+ bail("setsockopt SO_TIMESTAMPING");
+
+ /* request IP_PKTINFO for debugging purposes */
+ if (setsockopt(sock, SOL_IP, IP_PKTINFO,
+ &enabled, sizeof(enabled)) < 0)
+ printf("%s: %s\n", "setsockopt IP_PKTINFO", strerror(errno));
+
+ /* verify socket options */
+ len = sizeof(val);
+ if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, &val, &len) < 0)
+ printf("%s: %s\n", "getsockopt SO_TIMESTAMP", strerror(errno));
+ else
+ printf("SO_TIMESTAMP %d\n", val);
+
+ if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, &val, &len) < 0)
+ printf("%s: %s\n", "getsockopt SO_TIMESTAMPNS",
+ strerror(errno));
+ else
+ printf("SO_TIMESTAMPNS %d\n", val);
+
+ if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) {
+ printf("%s: %s\n", "getsockopt SO_TIMESTAMPING",
+ strerror(errno));
+ } else {
+ printf("SO_TIMESTAMPING %d\n", val);
+ if (val != so_timestamping_flags)
+ printf(" not the expected value %d\n",
+ so_timestamping_flags);
+ }
+
+ /* send packets forever every five seconds */
+ gettimeofday(&next, 0);
+ next.tv_sec = (next.tv_sec + 1) / 5 * 5;
+ next.tv_usec = 0;
+ while (1) {
+ struct timeval now;
+ struct timeval delta;
+ long delta_us;
+ int res;
+ fd_set readfs, errorfs;
+
+ gettimeofday(&now, 0);
+ delta_us = (long)(next.tv_sec - now.tv_sec) * 1000000 +
+ (long)(next.tv_usec - now.tv_usec);
+ if (delta_us > 0) {
+ /* continue waiting for timeout or data */
+ delta.tv_sec = delta_us / 1000000;
+ delta.tv_usec = delta_us % 1000000;
+
+ FD_ZERO(&readfs);
+ FD_ZERO(&errorfs);
+ FD_SET(sock, &readfs);
+ FD_SET(sock, &errorfs);
+ printf("%ld.%06ld: select %ldus\n",
+ (long)now.tv_sec, (long)now.tv_usec,
+ delta_us);
+ res = select(sock + 1, &readfs, 0, &errorfs, &delta);
+ gettimeofday(&now, 0);
+ printf("%ld.%06ld: select returned: %d, %s\n",
+ (long)now.tv_sec, (long)now.tv_usec,
+ res,
+ res < 0 ? strerror(errno) : "success");
+ if (res > 0) {
+ if (FD_ISSET(sock, &readfs))
+ printf("ready for reading\n");
+ if (FD_ISSET(sock, &errorfs))
+ printf("has error\n");
+ recvpacket(sock, 0,
+ siocgstamp,
+ siocgstampns);
+ recvpacket(sock, MSG_ERRQUEUE,
+ siocgstamp,
+ siocgstampns);
+ }
+ } else {
+ /* write one packet */
+ sendpacket(sock,
+ (struct sockaddr *)&addr,
+ sizeof(addr));
+ next.tv_sec += 5;
+ continue;
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c
new file mode 100644
index 000000000..81a98a240
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c
@@ -0,0 +1,564 @@
+/*
+ * Copyright 2014 Google Inc.
+ * Author: willemb@google.com (Willem de Bruijn)
+ *
+ * Test software tx timestamping, including
+ *
+ * - SCHED, SND and ACK timestamps
+ * - RAW, UDP and TCP
+ * - IPv4 and IPv6
+ * - various packet sizes (to test GSO and TSO)
+ *
+ * Consult the command line arguments for help on running
+ * the various testcases.
+ *
+ * This test requires a dummy TCP server.
+ * A simple `nc6 [-u] -l -p $DESTPORT` will do
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/types.h>
+#include <error.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/errqueue.h>
+#include <linux/if_ether.h>
+#include <linux/net_tstamp.h>
+#include <netdb.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <netinet/tcp.h>
+#include <netpacket/packet.h>
+#include <poll.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+/* command line parameters */
+static int cfg_proto = SOCK_STREAM;
+static int cfg_ipproto = IPPROTO_TCP;
+static int cfg_num_pkts = 4;
+static int do_ipv4 = 1;
+static int do_ipv6 = 1;
+static int cfg_payload_len = 10;
+static int cfg_poll_timeout = 100;
+static bool cfg_show_payload;
+static bool cfg_do_pktinfo;
+static bool cfg_loop_nodata;
+static bool cfg_no_delay;
+static uint16_t dest_port = 9000;
+
+static struct sockaddr_in daddr;
+static struct sockaddr_in6 daddr6;
+static struct timespec ts_prev;
+
+static void __print_timestamp(const char *name, struct timespec *cur,
+ uint32_t key, int payload_len)
+{
+ if (!(cur->tv_sec | cur->tv_nsec))
+ return;
+
+ fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)",
+ name, cur->tv_sec, cur->tv_nsec / 1000,
+ key, payload_len);
+
+ if ((ts_prev.tv_sec | ts_prev.tv_nsec)) {
+ int64_t cur_ms, prev_ms;
+
+ cur_ms = (long) cur->tv_sec * 1000 * 1000;
+ cur_ms += cur->tv_nsec / 1000;
+
+ prev_ms = (long) ts_prev.tv_sec * 1000 * 1000;
+ prev_ms += ts_prev.tv_nsec / 1000;
+
+ fprintf(stderr, " (%+" PRId64 " us)", cur_ms - prev_ms);
+ }
+
+ ts_prev = *cur;
+ fprintf(stderr, "\n");
+}
+
+static void print_timestamp_usr(void)
+{
+ struct timespec ts;
+ struct timeval tv; /* avoid dependency on -lrt */
+
+ gettimeofday(&tv, NULL);
+ ts.tv_sec = tv.tv_sec;
+ ts.tv_nsec = tv.tv_usec * 1000;
+
+ __print_timestamp(" USR", &ts, 0, 0);
+}
+
+static void print_timestamp(struct scm_timestamping *tss, int tstype,
+ int tskey, int payload_len)
+{
+ const char *tsname;
+
+ switch (tstype) {
+ case SCM_TSTAMP_SCHED:
+ tsname = " ENQ";
+ break;
+ case SCM_TSTAMP_SND:
+ tsname = " SND";
+ break;
+ case SCM_TSTAMP_ACK:
+ tsname = " ACK";
+ break;
+ default:
+ error(1, 0, "unknown timestamp type: %u",
+ tstype);
+ }
+ __print_timestamp(tsname, &tss->ts[0], tskey, payload_len);
+}
+
+/* TODO: convert to check_and_print payload once API is stable */
+static void print_payload(char *data, int len)
+{
+ int i;
+
+ if (!len)
+ return;
+
+ if (len > 70)
+ len = 70;
+
+ fprintf(stderr, "payload: ");
+ for (i = 0; i < len; i++)
+ fprintf(stderr, "%02hhx ", data[i]);
+ fprintf(stderr, "\n");
+}
+
+static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr)
+{
+ char sa[INET6_ADDRSTRLEN], da[INET6_ADDRSTRLEN];
+
+ fprintf(stderr, " pktinfo: ifindex=%u src=%s dst=%s\n",
+ ifindex,
+ saddr ? inet_ntop(family, saddr, sa, sizeof(sa)) : "unknown",
+ daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown");
+}
+
+static void __poll(int fd)
+{
+ struct pollfd pollfd;
+ int ret;
+
+ memset(&pollfd, 0, sizeof(pollfd));
+ pollfd.fd = fd;
+ ret = poll(&pollfd, 1, cfg_poll_timeout);
+ if (ret != 1)
+ error(1, errno, "poll");
+}
+
+static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
+{
+ struct sock_extended_err *serr = NULL;
+ struct scm_timestamping *tss = NULL;
+ struct cmsghdr *cm;
+ int batch = 0;
+
+ for (cm = CMSG_FIRSTHDR(msg);
+ cm && cm->cmsg_len;
+ cm = CMSG_NXTHDR(msg, cm)) {
+ if (cm->cmsg_level == SOL_SOCKET &&
+ cm->cmsg_type == SCM_TIMESTAMPING) {
+ tss = (void *) CMSG_DATA(cm);
+ } else if ((cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type == IP_RECVERR) ||
+ (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type == IPV6_RECVERR)) {
+ serr = (void *) CMSG_DATA(cm);
+ if (serr->ee_errno != ENOMSG ||
+ serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) {
+ fprintf(stderr, "unknown ip error %d %d\n",
+ serr->ee_errno,
+ serr->ee_origin);
+ serr = NULL;
+ }
+ } else if (cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type == IP_PKTINFO) {
+ struct in_pktinfo *info = (void *) CMSG_DATA(cm);
+ print_pktinfo(AF_INET, info->ipi_ifindex,
+ &info->ipi_spec_dst, &info->ipi_addr);
+ } else if (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type == IPV6_PKTINFO) {
+ struct in6_pktinfo *info6 = (void *) CMSG_DATA(cm);
+ print_pktinfo(AF_INET6, info6->ipi6_ifindex,
+ NULL, &info6->ipi6_addr);
+ } else
+ fprintf(stderr, "unknown cmsg %d,%d\n",
+ cm->cmsg_level, cm->cmsg_type);
+
+ if (serr && tss) {
+ print_timestamp(tss, serr->ee_info, serr->ee_data,
+ payload_len);
+ serr = NULL;
+ tss = NULL;
+ batch++;
+ }
+ }
+
+ if (batch > 1)
+ fprintf(stderr, "batched %d timestamps\n", batch);
+}
+
+static int recv_errmsg(int fd)
+{
+ static char ctrl[1024 /* overprovision*/];
+ static struct msghdr msg;
+ struct iovec entry;
+ static char *data;
+ int ret = 0;
+
+ data = malloc(cfg_payload_len);
+ if (!data)
+ error(1, 0, "malloc");
+
+ memset(&msg, 0, sizeof(msg));
+ memset(&entry, 0, sizeof(entry));
+ memset(ctrl, 0, sizeof(ctrl));
+
+ entry.iov_base = data;
+ entry.iov_len = cfg_payload_len;
+ msg.msg_iov = &entry;
+ msg.msg_iovlen = 1;
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_control = ctrl;
+ msg.msg_controllen = sizeof(ctrl);
+
+ ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (ret == -1 && errno != EAGAIN)
+ error(1, errno, "recvmsg");
+
+ if (ret >= 0) {
+ __recv_errmsg_cmsg(&msg, ret);
+ if (cfg_show_payload)
+ print_payload(data, cfg_payload_len);
+ }
+
+ free(data);
+ return ret == -1;
+}
+
+static void do_test(int family, unsigned int opt)
+{
+ char *buf;
+ int fd, i, val = 1, total_len;
+
+ if (family == AF_INET6 && cfg_proto != SOCK_STREAM) {
+ /* due to lack of checksum generation code */
+ fprintf(stderr, "test: skipping datagram over IPv6\n");
+ return;
+ }
+
+ total_len = cfg_payload_len;
+ if (cfg_proto == SOCK_RAW) {
+ total_len += sizeof(struct udphdr);
+ if (cfg_ipproto == IPPROTO_RAW)
+ total_len += sizeof(struct iphdr);
+ }
+
+ buf = malloc(total_len);
+ if (!buf)
+ error(1, 0, "malloc");
+
+ fd = socket(family, cfg_proto, cfg_ipproto);
+ if (fd < 0)
+ error(1, errno, "socket");
+
+ if (cfg_proto == SOCK_STREAM) {
+ if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
+ (char*) &val, sizeof(val)))
+ error(1, 0, "setsockopt no nagle");
+
+ if (family == PF_INET) {
+ if (connect(fd, (void *) &daddr, sizeof(daddr)))
+ error(1, errno, "connect ipv4");
+ } else {
+ if (connect(fd, (void *) &daddr6, sizeof(daddr6)))
+ error(1, errno, "connect ipv6");
+ }
+ }
+
+ if (cfg_do_pktinfo) {
+ if (family == AF_INET6) {
+ if (setsockopt(fd, SOL_IPV6, IPV6_RECVPKTINFO,
+ &val, sizeof(val)))
+ error(1, errno, "setsockopt pktinfo ipv6");
+ } else {
+ if (setsockopt(fd, SOL_IP, IP_PKTINFO,
+ &val, sizeof(val)))
+ error(1, errno, "setsockopt pktinfo ipv4");
+ }
+ }
+
+ opt |= SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_CMSG |
+ SOF_TIMESTAMPING_OPT_ID;
+ if (cfg_loop_nodata)
+ opt |= SOF_TIMESTAMPING_OPT_TSONLY;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
+ (char *) &opt, sizeof(opt)))
+ error(1, 0, "setsockopt timestamping");
+
+ for (i = 0; i < cfg_num_pkts; i++) {
+ memset(&ts_prev, 0, sizeof(ts_prev));
+ memset(buf, 'a' + i, total_len);
+
+ if (cfg_proto == SOCK_RAW) {
+ struct udphdr *udph;
+ int off = 0;
+
+ if (cfg_ipproto == IPPROTO_RAW) {
+ struct iphdr *iph = (void *) buf;
+
+ memset(iph, 0, sizeof(*iph));
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->ttl = 2;
+ iph->daddr = daddr.sin_addr.s_addr;
+ iph->protocol = IPPROTO_UDP;
+ /* kernel writes saddr, csum, len */
+
+ off = sizeof(*iph);
+ }
+
+ udph = (void *) buf + off;
+ udph->source = ntohs(9000); /* random spoof */
+ udph->dest = ntohs(dest_port);
+ udph->len = ntohs(sizeof(*udph) + cfg_payload_len);
+ udph->check = 0; /* not allowed for IPv6 */
+ }
+
+ print_timestamp_usr();
+ if (cfg_proto != SOCK_STREAM) {
+ if (family == PF_INET)
+ val = sendto(fd, buf, total_len, 0, (void *) &daddr, sizeof(daddr));
+ else
+ val = sendto(fd, buf, total_len, 0, (void *) &daddr6, sizeof(daddr6));
+ } else {
+ val = send(fd, buf, cfg_payload_len, 0);
+ }
+ if (val != total_len)
+ error(1, errno, "send");
+
+ /* wait for all errors to be queued, else ACKs arrive OOO */
+ if (!cfg_no_delay)
+ usleep(50 * 1000);
+
+ __poll(fd);
+
+ while (!recv_errmsg(fd)) {}
+ }
+
+ if (close(fd))
+ error(1, errno, "close");
+
+ free(buf);
+ usleep(400 * 1000);
+}
+
+static void __attribute__((noreturn)) usage(const char *filepath)
+{
+ fprintf(stderr, "\nUsage: %s [options] hostname\n"
+ "\nwhere options are:\n"
+ " -4: only IPv4\n"
+ " -6: only IPv6\n"
+ " -h: show this message\n"
+ " -c N: number of packets for each test\n"
+ " -D: no delay between packets\n"
+ " -F: poll() waits forever for an event\n"
+ " -I: request PKTINFO\n"
+ " -l N: send N bytes at a time\n"
+ " -n: set no-payload option\n"
+ " -r: use raw\n"
+ " -R: use raw (IP_HDRINCL)\n"
+ " -p N: connect to port N\n"
+ " -u: use udp\n"
+ " -x: show payload (up to 70 bytes)\n",
+ filepath);
+ exit(1);
+}
+
+static void parse_opt(int argc, char **argv)
+{
+ int proto_count = 0;
+ char c;
+
+ while ((c = getopt(argc, argv, "46c:DFhIl:np:rRux")) != -1) {
+ switch (c) {
+ case '4':
+ do_ipv6 = 0;
+ break;
+ case '6':
+ do_ipv4 = 0;
+ break;
+ case 'c':
+ cfg_num_pkts = strtoul(optarg, NULL, 10);
+ break;
+ case 'D':
+ cfg_no_delay = true;
+ break;
+ case 'F':
+ cfg_poll_timeout = -1;
+ break;
+ case 'I':
+ cfg_do_pktinfo = true;
+ break;
+ case 'n':
+ cfg_loop_nodata = true;
+ break;
+ case 'r':
+ proto_count++;
+ cfg_proto = SOCK_RAW;
+ cfg_ipproto = IPPROTO_UDP;
+ break;
+ case 'R':
+ proto_count++;
+ cfg_proto = SOCK_RAW;
+ cfg_ipproto = IPPROTO_RAW;
+ break;
+ case 'u':
+ proto_count++;
+ cfg_proto = SOCK_DGRAM;
+ cfg_ipproto = IPPROTO_UDP;
+ break;
+ case 'l':
+ cfg_payload_len = strtoul(optarg, NULL, 10);
+ break;
+ case 'p':
+ dest_port = strtoul(optarg, NULL, 10);
+ break;
+ case 'x':
+ cfg_show_payload = true;
+ break;
+ case 'h':
+ default:
+ usage(argv[0]);
+ }
+ }
+
+ if (!cfg_payload_len)
+ error(1, 0, "payload may not be nonzero");
+ if (cfg_proto != SOCK_STREAM && cfg_payload_len > 1472)
+ error(1, 0, "udp packet might exceed expected MTU");
+ if (!do_ipv4 && !do_ipv6)
+ error(1, 0, "pass -4 or -6, not both");
+ if (proto_count > 1)
+ error(1, 0, "pass -r, -R or -u, not multiple");
+
+ if (optind != argc - 1)
+ error(1, 0, "missing required hostname argument");
+}
+
+static void resolve_hostname(const char *hostname)
+{
+ struct addrinfo *addrs, *cur;
+ int have_ipv4 = 0, have_ipv6 = 0;
+
+ if (getaddrinfo(hostname, NULL, NULL, &addrs))
+ error(1, errno, "getaddrinfo");
+
+ cur = addrs;
+ while (cur && !have_ipv4 && !have_ipv6) {
+ if (!have_ipv4 && cur->ai_family == AF_INET) {
+ memcpy(&daddr, cur->ai_addr, sizeof(daddr));
+ daddr.sin_port = htons(dest_port);
+ have_ipv4 = 1;
+ }
+ else if (!have_ipv6 && cur->ai_family == AF_INET6) {
+ memcpy(&daddr6, cur->ai_addr, sizeof(daddr6));
+ daddr6.sin6_port = htons(dest_port);
+ have_ipv6 = 1;
+ }
+ cur = cur->ai_next;
+ }
+ if (addrs)
+ freeaddrinfo(addrs);
+
+ do_ipv4 &= have_ipv4;
+ do_ipv6 &= have_ipv6;
+}
+
+static void do_main(int family)
+{
+ fprintf(stderr, "family: %s\n",
+ family == PF_INET ? "INET" : "INET6");
+
+ fprintf(stderr, "test SND\n");
+ do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE);
+
+ fprintf(stderr, "test ENQ\n");
+ do_test(family, SOF_TIMESTAMPING_TX_SCHED);
+
+ fprintf(stderr, "test ENQ + SND\n");
+ do_test(family, SOF_TIMESTAMPING_TX_SCHED |
+ SOF_TIMESTAMPING_TX_SOFTWARE);
+
+ if (cfg_proto == SOCK_STREAM) {
+ fprintf(stderr, "\ntest ACK\n");
+ do_test(family, SOF_TIMESTAMPING_TX_ACK);
+
+ fprintf(stderr, "\ntest SND + ACK\n");
+ do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK);
+
+ fprintf(stderr, "\ntest ENQ + SND + ACK\n");
+ do_test(family, SOF_TIMESTAMPING_TX_SCHED |
+ SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK);
+ }
+}
+
+const char *sock_names[] = { NULL, "TCP", "UDP", "RAW" };
+
+int main(int argc, char **argv)
+{
+ if (argc == 1)
+ usage(argv[0]);
+
+ parse_opt(argc, argv);
+ resolve_hostname(argv[argc - 1]);
+
+ fprintf(stderr, "protocol: %s\n", sock_names[cfg_proto]);
+ fprintf(stderr, "payload: %u\n", cfg_payload_len);
+ fprintf(stderr, "server port: %u\n", dest_port);
+ fprintf(stderr, "\n");
+
+ if (do_ipv4)
+ do_main(PF_INET);
+ if (do_ipv6)
+ do_main(PF_INET6);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/nsfs/.gitignore b/tools/testing/selftests/nsfs/.gitignore
new file mode 100644
index 000000000..2ab2c824c
--- /dev/null
+++ b/tools/testing/selftests/nsfs/.gitignore
@@ -0,0 +1,2 @@
+owner
+pidns
diff --git a/tools/testing/selftests/nsfs/Makefile b/tools/testing/selftests/nsfs/Makefile
new file mode 100644
index 000000000..9ff7c7f80
--- /dev/null
+++ b/tools/testing/selftests/nsfs/Makefile
@@ -0,0 +1,5 @@
+TEST_GEN_PROGS := owner pidns
+
+CFLAGS := -Wall -Werror
+
+include ../lib.mk
diff --git a/tools/testing/selftests/nsfs/config b/tools/testing/selftests/nsfs/config
new file mode 100644
index 000000000..598d0a225
--- /dev/null
+++ b/tools/testing/selftests/nsfs/config
@@ -0,0 +1,3 @@
+CONFIG_USER_NS=y
+CONFIG_UTS_NS=y
+CONFIG_PID_NS=y
diff --git a/tools/testing/selftests/nsfs/owner.c b/tools/testing/selftests/nsfs/owner.c
new file mode 100644
index 000000000..96a976c74
--- /dev/null
+++ b/tools/testing/selftests/nsfs/owner.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+
+#define NSIO 0xb7
+#define NS_GET_USERNS _IO(NSIO, 0x1)
+
+#define pr_err(fmt, ...) \
+ ({ \
+ fprintf(stderr, "%s:%d:" fmt ": %m\n", \
+ __func__, __LINE__, ##__VA_ARGS__); \
+ 1; \
+ })
+
+int main(int argc, char *argvp[])
+{
+ int pfd[2], ns, uns, init_uns;
+ struct stat st1, st2;
+ char path[128];
+ pid_t pid;
+ char c;
+
+ if (pipe(pfd))
+ return 1;
+
+ pid = fork();
+ if (pid < 0)
+ return pr_err("fork");
+ if (pid == 0) {
+ prctl(PR_SET_PDEATHSIG, SIGKILL);
+ if (unshare(CLONE_NEWUTS | CLONE_NEWUSER))
+ return pr_err("unshare");
+ close(pfd[0]);
+ close(pfd[1]);
+ while (1)
+ sleep(1);
+ return 0;
+ }
+ close(pfd[1]);
+ if (read(pfd[0], &c, 1) != 0)
+ return pr_err("Unable to read from pipe");
+ close(pfd[0]);
+
+ snprintf(path, sizeof(path), "/proc/%d/ns/uts", pid);
+ ns = open(path, O_RDONLY);
+ if (ns < 0)
+ return pr_err("Unable to open %s", path);
+
+ uns = ioctl(ns, NS_GET_USERNS);
+ if (uns < 0)
+ return pr_err("Unable to get an owning user namespace");
+
+ if (fstat(uns, &st1))
+ return pr_err("fstat");
+
+ snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
+ if (stat(path, &st2))
+ return pr_err("stat");
+
+ if (st1.st_ino != st2.st_ino)
+ return pr_err("NS_GET_USERNS returned a wrong namespace");
+
+ init_uns = ioctl(uns, NS_GET_USERNS);
+ if (uns < 0)
+ return pr_err("Unable to get an owning user namespace");
+
+ if (ioctl(init_uns, NS_GET_USERNS) >= 0 || errno != EPERM)
+ return pr_err("Don't get EPERM");
+
+ if (unshare(CLONE_NEWUSER))
+ return pr_err("unshare");
+
+ if (ioctl(ns, NS_GET_USERNS) >= 0 || errno != EPERM)
+ return pr_err("Don't get EPERM");
+ if (ioctl(init_uns, NS_GET_USERNS) >= 0 || errno != EPERM)
+ return pr_err("Don't get EPERM");
+
+ kill(pid, SIGKILL);
+ wait(NULL);
+ return 0;
+}
diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/nsfs/pidns.c
new file mode 100644
index 000000000..e0d86e166
--- /dev/null
+++ b/tools/testing/selftests/nsfs/pidns.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+
+#define pr_err(fmt, ...) \
+ ({ \
+ fprintf(stderr, "%s:%d:" fmt ": %m\n", \
+ __func__, __LINE__, ##__VA_ARGS__); \
+ 1; \
+ })
+
+#define NSIO 0xb7
+#define NS_GET_USERNS _IO(NSIO, 0x1)
+#define NS_GET_PARENT _IO(NSIO, 0x2)
+
+#define __stack_aligned__ __attribute__((aligned(16)))
+struct cr_clone_arg {
+ char stack[128] __stack_aligned__;
+ char stack_ptr[0];
+};
+
+static int child(void *args)
+{
+ prctl(PR_SET_PDEATHSIG, SIGKILL);
+ while (1)
+ sleep(1);
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ char *ns_strs[] = {"pid", "user"};
+ char path[] = "/proc/0123456789/ns/pid";
+ struct cr_clone_arg ca;
+ struct stat st1, st2;
+ int ns, pns, i;
+ pid_t pid;
+
+ pid = clone(child, ca.stack_ptr, CLONE_NEWUSER | CLONE_NEWPID | SIGCHLD, NULL);
+ if (pid < 0)
+ return pr_err("clone");
+
+ for (i = 0; i < 2; i++) {
+ snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns_strs[i]);
+ ns = open(path, O_RDONLY);
+ if (ns < 0)
+ return pr_err("Unable to open %s", path);
+
+ pns = ioctl(ns, NS_GET_PARENT);
+ if (pns < 0)
+ return pr_err("Unable to get a parent pidns");
+
+ snprintf(path, sizeof(path), "/proc/self/ns/%s", ns_strs[i]);
+ if (stat(path, &st2))
+ return pr_err("Unable to stat %s", path);
+ if (fstat(pns, &st1))
+ return pr_err("Unable to stat the parent pidns");
+ if (st1.st_ino != st2.st_ino)
+ return pr_err("NS_GET_PARENT returned a wrong namespace");
+
+ if (ioctl(pns, NS_GET_PARENT) >= 0 || errno != EPERM)
+ return pr_err("Don't get EPERM");
+ }
+
+ kill(pid, SIGKILL);
+ wait(NULL);
+ return 0;
+}
diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
new file mode 100755
index 000000000..17ca36403
--- /dev/null
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -0,0 +1,590 @@
+#!/bin/bash
+# Copyright (c) 2016 Microsemi. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Author: Logan Gunthorpe <logang@deltatee.com>
+
+REMOTE_HOST=
+LIST_DEVS=FALSE
+
+DEBUGFS=${DEBUGFS-/sys/kernel/debug}
+
+PERF_RUN_ORDER=32
+MAX_MW_SIZE=0
+RUN_DMA_TESTS=
+DONT_CLEANUP=
+MW_SIZE=65536
+
+function show_help()
+{
+ echo "Usage: $0 [OPTIONS] LOCAL_DEV REMOTE_DEV"
+ echo "Run tests on a pair of NTB endpoints."
+ echo
+ echo "If the NTB device loops back to the same host then,"
+ echo "just specifying the two PCI ids on the command line is"
+ echo "sufficient. Otherwise, if the NTB link spans two hosts"
+ echo "use the -r option to specify the hostname for the remote"
+ echo "device. SSH will then be used to test the remote side."
+ echo "An SSH key between the root users of the host would then"
+ echo "be highly recommended."
+ echo
+ echo "Options:"
+ echo " -C don't cleanup ntb modules on exit"
+ echo " -h show this help message"
+ echo " -l list available local and remote PCI ids"
+ echo " -r REMOTE_HOST specify the remote's hostname to connect"
+ echo " to for the test (using ssh)"
+ echo " -m MW_SIZE memory window size for ntb_tool"
+ echo " (default: $MW_SIZE)"
+ echo " -d run dma tests for ntb_perf"
+ echo " -p ORDER total data order for ntb_perf"
+ echo " (default: $PERF_RUN_ORDER)"
+ echo " -w MAX_MW_SIZE maxmium memory window size for ntb_perf"
+ echo
+}
+
+function parse_args()
+{
+ OPTIND=0
+ while getopts "b:Cdhlm:r:p:w:" opt; do
+ case "$opt" in
+ C) DONT_CLEANUP=1 ;;
+ d) RUN_DMA_TESTS=1 ;;
+ h) show_help; exit 0 ;;
+ l) LIST_DEVS=TRUE ;;
+ m) MW_SIZE=${OPTARG} ;;
+ r) REMOTE_HOST=${OPTARG} ;;
+ p) PERF_RUN_ORDER=${OPTARG} ;;
+ w) MAX_MW_SIZE=${OPTARG} ;;
+ \?)
+ echo "Invalid option: -$OPTARG" >&2
+ exit 1
+ ;;
+ esac
+ done
+}
+
+parse_args "$@"
+shift $((OPTIND-1))
+LOCAL_DEV=$1
+shift
+parse_args "$@"
+shift $((OPTIND-1))
+REMOTE_DEV=$1
+shift
+parse_args "$@"
+
+set -e
+
+function _modprobe()
+{
+ modprobe "$@"
+
+ if [[ "$REMOTE_HOST" != "" ]]; then
+ ssh "$REMOTE_HOST" modprobe "$@"
+ fi
+}
+
+function split_remote()
+{
+ VPATH=$1
+ REMOTE=
+
+ if [[ "$VPATH" == *":/"* ]]; then
+ REMOTE=${VPATH%%:*}
+ VPATH=${VPATH#*:}
+ fi
+}
+
+function read_file()
+{
+ split_remote $1
+ if [[ "$REMOTE" != "" ]]; then
+ ssh "$REMOTE" cat "$VPATH"
+ else
+ cat "$VPATH"
+ fi
+}
+
+function write_file()
+{
+ split_remote $2
+ VALUE=$1
+
+ if [[ "$REMOTE" != "" ]]; then
+ ssh "$REMOTE" "echo \"$VALUE\" > \"$VPATH\""
+ else
+ echo "$VALUE" > "$VPATH"
+ fi
+}
+
+function check_file()
+{
+ split_remote $1
+
+ if [[ "$REMOTE" != "" ]]; then
+ ssh "$REMOTE" "[[ -e ${VPATH} ]]"
+ else
+ [[ -e ${VPATH} ]]
+ fi
+}
+
+function subdirname()
+{
+ echo $(basename $(dirname $1)) 2> /dev/null
+}
+
+function find_pidx()
+{
+ PORT=$1
+ PPATH=$2
+
+ for ((i = 0; i < 64; i++)); do
+ PEER_DIR="$PPATH/peer$i"
+
+ check_file ${PEER_DIR} || break
+
+ PEER_PORT=$(read_file "${PEER_DIR}/port")
+ if [[ ${PORT} -eq $PEER_PORT ]]; then
+ echo $i
+ return 0
+ fi
+ done
+
+ return 1
+}
+
+function port_test()
+{
+ LOC=$1
+ REM=$2
+
+ echo "Running port tests on: $(basename $LOC) / $(basename $REM)"
+
+ LOCAL_PORT=$(read_file "$LOC/port")
+ REMOTE_PORT=$(read_file "$REM/port")
+
+ LOCAL_PIDX=$(find_pidx ${REMOTE_PORT} "$LOC")
+ REMOTE_PIDX=$(find_pidx ${LOCAL_PORT} "$REM")
+
+ echo "Local port ${LOCAL_PORT} with index ${REMOTE_PIDX} on remote host"
+ echo "Peer port ${REMOTE_PORT} with index ${LOCAL_PIDX} on local host"
+
+ echo " Passed"
+}
+
+function link_test()
+{
+ LOC=$1
+ REM=$2
+ EXP=0
+
+ echo "Running link tests on: $(subdirname $LOC) / $(subdirname $REM)"
+
+ if ! write_file "N" "$LOC/../link" 2> /dev/null; then
+ echo " Unsupported"
+ return
+ fi
+
+ write_file "N" "$LOC/link_event"
+
+ if [[ $(read_file "$REM/link") != "N" ]]; then
+ echo "Expected link to be down in $REM/link" >&2
+ exit -1
+ fi
+
+ write_file "Y" "$LOC/../link"
+
+ echo " Passed"
+}
+
+function doorbell_test()
+{
+ LOC=$1
+ REM=$2
+ EXP=0
+
+ echo "Running db tests on: $(basename $LOC) / $(basename $REM)"
+
+ DB_VALID_MASK=$(read_file "$LOC/db_valid_mask")
+
+ write_file "c $DB_VALID_MASK" "$REM/db"
+
+ for ((i = 0; i < 64; i++)); do
+ DB=$(read_file "$REM/db")
+ if [[ "$DB" -ne "$EXP" ]]; then
+ echo "Doorbell doesn't match expected value $EXP " \
+ "in $REM/db" >&2
+ exit -1
+ fi
+
+ let "MASK = (1 << $i) & $DB_VALID_MASK" || true
+ let "EXP = $EXP | $MASK" || true
+
+ write_file "s $MASK" "$LOC/peer_db"
+ done
+
+ write_file "c $DB_VALID_MASK" "$REM/db_mask"
+ write_file $DB_VALID_MASK "$REM/db_event"
+ write_file "s $DB_VALID_MASK" "$REM/db_mask"
+
+ write_file "c $DB_VALID_MASK" "$REM/db"
+
+ echo " Passed"
+}
+
+function get_files_count()
+{
+ NAME=$1
+ LOC=$2
+
+ split_remote $LOC
+
+ if [[ "$REMOTE" == "" ]]; then
+ echo $(ls -1 "$VPATH"/${NAME}* 2>/dev/null | wc -l)
+ else
+ echo $(ssh "$REMOTE" "ls -1 \"$VPATH\"/${NAME}* | \
+ wc -l" 2> /dev/null)
+ fi
+}
+
+function scratchpad_test()
+{
+ LOC=$1
+ REM=$2
+
+ echo "Running spad tests on: $(subdirname $LOC) / $(subdirname $REM)"
+
+ CNT=$(get_files_count "spad" "$LOC")
+
+ if [[ $CNT -eq 0 ]]; then
+ echo " Unsupported"
+ return
+ fi
+
+ for ((i = 0; i < $CNT; i++)); do
+ VAL=$RANDOM
+ write_file "$VAL" "$LOC/spad$i"
+ RVAL=$(read_file "$REM/../spad$i")
+
+ if [[ "$VAL" -ne "$RVAL" ]]; then
+ echo "Scratchpad $i value $RVAL doesn't match $VAL" >&2
+ exit -1
+ fi
+ done
+
+ echo " Passed"
+}
+
+function message_test()
+{
+ LOC=$1
+ REM=$2
+
+ echo "Running msg tests on: $(subdirname $LOC) / $(subdirname $REM)"
+
+ CNT=$(get_files_count "msg" "$LOC")
+
+ if [[ $CNT -eq 0 ]]; then
+ echo " Unsupported"
+ return
+ fi
+
+ MSG_OUTBITS_MASK=$(read_file "$LOC/../msg_inbits")
+ MSG_INBITS_MASK=$(read_file "$REM/../msg_inbits")
+
+ write_file "c $MSG_OUTBITS_MASK" "$LOC/../msg_sts"
+ write_file "c $MSG_INBITS_MASK" "$REM/../msg_sts"
+
+ for ((i = 0; i < $CNT; i++)); do
+ VAL=$RANDOM
+ write_file "$VAL" "$LOC/msg$i"
+ RVAL=$(read_file "$REM/../msg$i")
+
+ if [[ "$VAL" -ne "${RVAL%%<-*}" ]]; then
+ echo "Message $i value $RVAL doesn't match $VAL" >&2
+ exit -1
+ fi
+ done
+
+ echo " Passed"
+}
+
+function get_number()
+{
+ KEY=$1
+
+ sed -n "s/^\(${KEY}\)[ \t]*\(0x[0-9a-fA-F]*\)\(\[p\]\)\?$/\2/p"
+}
+
+function mw_alloc()
+{
+ IDX=$1
+ LOC=$2
+ REM=$3
+
+ write_file $MW_SIZE "$LOC/mw_trans$IDX"
+
+ INB_MW=$(read_file "$LOC/mw_trans$IDX")
+ MW_ALIGNED_SIZE=$(echo "$INB_MW" | get_number "Window Size")
+ MW_DMA_ADDR=$(echo "$INB_MW" | get_number "DMA Address")
+
+ write_file "$MW_DMA_ADDR:$(($MW_ALIGNED_SIZE))" "$REM/peer_mw_trans$IDX"
+
+ if [[ $MW_SIZE -ne $MW_ALIGNED_SIZE ]]; then
+ echo "MW $IDX size aligned to $MW_ALIGNED_SIZE"
+ fi
+}
+
+function write_mw()
+{
+ split_remote $2
+
+ if [[ "$REMOTE" != "" ]]; then
+ ssh "$REMOTE" \
+ dd if=/dev/urandom "of=$VPATH" 2> /dev/null || true
+ else
+ dd if=/dev/urandom "of=$VPATH" 2> /dev/null || true
+ fi
+}
+
+function mw_check()
+{
+ IDX=$1
+ LOC=$2
+ REM=$3
+
+ write_mw "$LOC/mw$IDX"
+
+ split_remote "$LOC/mw$IDX"
+ if [[ "$REMOTE" == "" ]]; then
+ A=$VPATH
+ else
+ A=/tmp/ntb_test.$$.A
+ ssh "$REMOTE" cat "$VPATH" > "$A"
+ fi
+
+ split_remote "$REM/peer_mw$IDX"
+ if [[ "$REMOTE" == "" ]]; then
+ B=$VPATH
+ else
+ B=/tmp/ntb_test.$$.B
+ ssh "$REMOTE" cat "$VPATH" > "$B"
+ fi
+
+ cmp -n $MW_ALIGNED_SIZE "$A" "$B"
+ if [[ $? != 0 ]]; then
+ echo "Memory window $MW did not match!" >&2
+ fi
+
+ if [[ "$A" == "/tmp/*" ]]; then
+ rm "$A"
+ fi
+
+ if [[ "$B" == "/tmp/*" ]]; then
+ rm "$B"
+ fi
+}
+
+function mw_free()
+{
+ IDX=$1
+ LOC=$2
+ REM=$3
+
+ write_file "$MW_DMA_ADDR:0" "$REM/peer_mw_trans$IDX"
+
+ write_file 0 "$LOC/mw_trans$IDX"
+}
+
+function mw_test()
+{
+ LOC=$1
+ REM=$2
+
+ CNT=$(get_files_count "mw_trans" "$LOC")
+
+ for ((i = 0; i < $CNT; i++)); do
+ echo "Running mw$i tests on: $(subdirname $LOC) / " \
+ "$(subdirname $REM)"
+
+ mw_alloc $i $LOC $REM
+
+ mw_check $i $LOC $REM
+
+ mw_free $i $LOC $REM
+
+ echo " Passed"
+ done
+
+}
+
+function pingpong_test()
+{
+ LOC=$1
+ REM=$2
+
+ echo "Running ping pong tests on: $(basename $LOC) / $(basename $REM)"
+
+ LOC_START=$(read_file "$LOC/count")
+ REM_START=$(read_file "$REM/count")
+
+ sleep 7
+
+ LOC_END=$(read_file "$LOC/count")
+ REM_END=$(read_file "$REM/count")
+
+ if [[ $LOC_START == $LOC_END ]] || [[ $REM_START == $REM_END ]]; then
+ echo "Ping pong counter not incrementing!" >&2
+ exit 1
+ fi
+
+ echo " Passed"
+}
+
+function perf_test()
+{
+ USE_DMA=$1
+
+ if [[ $USE_DMA == "1" ]]; then
+ WITH="with"
+ else
+ WITH="without"
+ fi
+
+ _modprobe ntb_perf total_order=$PERF_RUN_ORDER \
+ max_mw_size=$MAX_MW_SIZE use_dma=$USE_DMA
+
+ echo "Running local perf test $WITH DMA"
+ write_file "$LOCAL_PIDX" "$LOCAL_PERF/run"
+ echo -n " "
+ read_file "$LOCAL_PERF/run"
+ echo " Passed"
+
+ echo "Running remote perf test $WITH DMA"
+ write_file "$REMOTE_PIDX" "$REMOTE_PERF/run"
+ echo -n " "
+ read_file "$REMOTE_PERF/run"
+ echo " Passed"
+
+ _modprobe -r ntb_perf
+}
+
+function ntb_tool_tests()
+{
+ LOCAL_TOOL="$DEBUGFS/ntb_tool/$LOCAL_DEV"
+ REMOTE_TOOL="$REMOTE_HOST:$DEBUGFS/ntb_tool/$REMOTE_DEV"
+
+ echo "Starting ntb_tool tests..."
+
+ _modprobe ntb_tool
+
+ port_test "$LOCAL_TOOL" "$REMOTE_TOOL"
+
+ LOCAL_PEER_TOOL="$LOCAL_TOOL/peer$LOCAL_PIDX"
+ REMOTE_PEER_TOOL="$REMOTE_TOOL/peer$REMOTE_PIDX"
+
+ link_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
+ link_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
+
+ #Ensure the link is up on both sides before continuing
+ write_file "Y" "$LOCAL_PEER_TOOL/link_event"
+ write_file "Y" "$REMOTE_PEER_TOOL/link_event"
+
+ doorbell_test "$LOCAL_TOOL" "$REMOTE_TOOL"
+ doorbell_test "$REMOTE_TOOL" "$LOCAL_TOOL"
+
+ scratchpad_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
+ scratchpad_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
+
+ message_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
+ message_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
+
+ mw_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
+ mw_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
+
+ _modprobe -r ntb_tool
+}
+
+function ntb_pingpong_tests()
+{
+ LOCAL_PP="$DEBUGFS/ntb_pingpong/$LOCAL_DEV"
+ REMOTE_PP="$REMOTE_HOST:$DEBUGFS/ntb_pingpong/$REMOTE_DEV"
+
+ echo "Starting ntb_pingpong tests..."
+
+ _modprobe ntb_pingpong
+
+ pingpong_test $LOCAL_PP $REMOTE_PP
+
+ _modprobe -r ntb_pingpong
+}
+
+function ntb_perf_tests()
+{
+ LOCAL_PERF="$DEBUGFS/ntb_perf/$LOCAL_DEV"
+ REMOTE_PERF="$REMOTE_HOST:$DEBUGFS/ntb_perf/$REMOTE_DEV"
+
+ echo "Starting ntb_perf tests..."
+
+ perf_test 0
+
+ if [[ $RUN_DMA_TESTS ]]; then
+ perf_test 1
+ fi
+}
+
+function cleanup()
+{
+ set +e
+ _modprobe -r ntb_tool 2> /dev/null
+ _modprobe -r ntb_perf 2> /dev/null
+ _modprobe -r ntb_pingpong 2> /dev/null
+ _modprobe -r ntb_transport 2> /dev/null
+ set -e
+}
+
+cleanup
+
+if ! [[ $$DONT_CLEANUP ]]; then
+ trap cleanup EXIT
+fi
+
+if [ "$(id -u)" != "0" ]; then
+ echo "This script must be run as root" 1>&2
+ exit 1
+fi
+
+if [[ "$LIST_DEVS" == TRUE ]]; then
+ echo "Local Devices:"
+ ls -1 /sys/bus/ntb/devices
+ echo
+
+ if [[ "$REMOTE_HOST" != "" ]]; then
+ echo "Remote Devices:"
+ ssh $REMOTE_HOST ls -1 /sys/bus/ntb/devices
+ fi
+
+ exit 0
+fi
+
+if [[ "$LOCAL_DEV" == $"" ]] || [[ "$REMOTE_DEV" == $"" ]]; then
+ show_help
+ exit 1
+fi
+
+ntb_tool_tests
+echo
+ntb_pingpong_tests
+echo
+ntb_perf_tests
+echo
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
new file mode 100644
index 000000000..201b59855
--- /dev/null
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for powerpc selftests
+
+# ARCH can be overridden by the user for cross compiling
+ARCH ?= $(shell uname -m)
+ARCH := $(shell echo $(ARCH) | sed -e s/ppc.*/powerpc/)
+
+ifeq ($(ARCH),powerpc)
+
+GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
+
+CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR)/include $(CFLAGS)
+
+export CFLAGS
+
+SUB_DIRS = alignment \
+ benchmarks \
+ cache_shape \
+ copyloops \
+ dscr \
+ mm \
+ pmu \
+ signal \
+ primitives \
+ stringloops \
+ switch_endian \
+ syscalls \
+ tm \
+ vphn \
+ math \
+ ptrace
+
+endif
+
+all: $(SUB_DIRS)
+
+$(SUB_DIRS):
+ BUILD_TARGET=$(OUTPUT)/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all
+
+include ../lib.mk
+
+override define RUN_TESTS
+ @for TARGET in $(SUB_DIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
+ done;
+endef
+
+override define INSTALL_RULE
+ @for TARGET in $(SUB_DIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install;\
+ done;
+endef
+
+override define EMIT_TESTS
+ @for TARGET in $(SUB_DIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests;\
+ done;
+endef
+
+override define CLEAN
+ @for TARGET in $(SUB_DIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean; \
+ done;
+ rm -f tags
+endef
+
+tags:
+ find . -name '*.c' -o -name '*.h' | xargs ctags
+
+.PHONY: tags $(SUB_DIRS)
diff --git a/tools/testing/selftests/powerpc/alignment/.gitignore b/tools/testing/selftests/powerpc/alignment/.gitignore
new file mode 100644
index 000000000..6d4fd0145
--- /dev/null
+++ b/tools/testing/selftests/powerpc/alignment/.gitignore
@@ -0,0 +1,2 @@
+copy_first_unaligned
+alignment_handler
diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile
new file mode 100644
index 000000000..d056486f4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/alignment/Makefile
@@ -0,0 +1,6 @@
+TEST_GEN_PROGS := copy_first_unaligned alignment_handler
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
new file mode 100644
index 000000000..4f8335e0c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -0,0 +1,575 @@
+/*
+ * Test the powerpc alignment handler on POWER8/POWER9
+ *
+ * Copyright (C) 2017 IBM Corporation (Michael Neuling, Andrew Donnellan)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * This selftest exercises the powerpc alignment fault handler.
+ *
+ * We create two sets of source and destination buffers, one in regular memory,
+ * the other cache-inhibited (we use /dev/fb0 for this).
+ *
+ * We initialise the source buffers, then use whichever set of load/store
+ * instructions is under test to copy bytes from the source buffers to the
+ * destination buffers. For the regular buffers, these instructions will
+ * execute normally. For the cache-inhibited buffers, these instructions
+ * will trap and cause an alignment fault, and the alignment fault handler
+ * will emulate the particular instruction under test. We then compare the
+ * destination buffers to ensure that the native and emulated cases give the
+ * same result.
+ *
+ * TODO:
+ * - Any FIXMEs below
+ * - Test VSX regs < 32 and > 32
+ * - Test all loads and stores
+ * - Check update forms do update register
+ * - Test alignment faults over page boundary
+ *
+ * Some old binutils may not support all the instructions.
+ */
+
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <getopt.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include <asm/cputable.h>
+
+#include "utils.h"
+
+int bufsize;
+int debug;
+int testing;
+volatile int gotsig;
+
+void sighandler(int sig, siginfo_t *info, void *ctx)
+{
+ ucontext_t *ucp = ctx;
+
+ if (!testing) {
+ signal(sig, SIG_DFL);
+ kill(0, sig);
+ }
+ gotsig = sig;
+#ifdef __powerpc64__
+ ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+#else
+ ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4;
+#endif
+}
+
+#define XFORM(reg, n) " " #reg " ,%"#n",%2 ;"
+#define DFORM(reg, n) " " #reg " ,0(%"#n") ;"
+
+#define TEST(name, ld_op, st_op, form, ld_reg, st_reg) \
+ void test_##name(char *s, char *d) \
+ { \
+ asm volatile( \
+ #ld_op form(ld_reg, 0) \
+ #st_op form(st_reg, 1) \
+ :: "r"(s), "r"(d), "r"(0) \
+ : "memory", "vs0", "vs32", "r31"); \
+ } \
+ rc |= do_test(#name, test_##name)
+
+#define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32)
+#define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32)
+#define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32)
+#define STORE_VSX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 32)
+#define LOAD_VMX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 0, 32)
+#define STORE_VMX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 0)
+#define LOAD_VMX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 0, 32)
+#define STORE_VMX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 0)
+
+#define LOAD_XFORM_TEST(op) TEST(op, op, stdx, XFORM, 31, 31)
+#define STORE_XFORM_TEST(op) TEST(op, ldx, op, XFORM, 31, 31)
+#define LOAD_DFORM_TEST(op) TEST(op, op, std, DFORM, 31, 31)
+#define STORE_DFORM_TEST(op) TEST(op, ld, op, DFORM, 31, 31)
+
+#define LOAD_FLOAT_DFORM_TEST(op) TEST(op, op, stfd, DFORM, 0, 0)
+#define STORE_FLOAT_DFORM_TEST(op) TEST(op, lfd, op, DFORM, 0, 0)
+#define LOAD_FLOAT_XFORM_TEST(op) TEST(op, op, stfdx, XFORM, 0, 0)
+#define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0)
+
+
+/* FIXME: Unimplemented tests: */
+// STORE_DFORM_TEST(stq) /* FIXME: need two registers for quad */
+// STORE_DFORM_TEST(stswi) /* FIXME: string instruction */
+
+// STORE_XFORM_TEST(stwat) /* AMO can't emulate or run on CI */
+// STORE_XFORM_TEST(stdat) /* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */
+
+
+/* preload byte by byte */
+void preload_data(void *dst, int offset, int width)
+{
+ char *c = dst;
+ int i;
+
+ c += offset;
+
+ for (i = 0 ; i < width ; i++)
+ c[i] = i;
+}
+
+int test_memcpy(void *dst, void *src, int size, int offset,
+ void (*test_func)(char *, char *))
+{
+ char *s, *d;
+
+ s = src;
+ s += offset;
+ d = dst;
+ d += offset;
+
+ assert(size == 16);
+ gotsig = 0;
+ testing = 1;
+
+ test_func(s, d); /* run the actual test */
+
+ testing = 0;
+ if (gotsig) {
+ if (debug)
+ printf(" Got signal %i\n", gotsig);
+ return 1;
+ }
+ return 0;
+}
+
+void dumpdata(char *s1, char *s2, int n, char *test_name)
+{
+ int i;
+
+ printf(" %s: unexpected result:\n", test_name);
+ printf(" mem:");
+ for (i = 0; i < n; i++)
+ printf(" %02x", s1[i]);
+ printf("\n");
+ printf(" ci: ");
+ for (i = 0; i < n; i++)
+ printf(" %02x", s2[i]);
+ printf("\n");
+}
+
+int test_memcmp(void *s1, void *s2, int n, int offset, char *test_name)
+{
+ char *s1c, *s2c;
+
+ s1c = s1;
+ s1c += offset;
+ s2c = s2;
+ s2c += offset;
+
+ if (memcmp(s1c, s2c, n)) {
+ if (debug) {
+ printf("\n Compare failed. Offset:%i length:%i\n",
+ offset, n);
+ dumpdata(s1c, s2c, n, test_name);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Do two memcpy tests using the same instructions. One cachable
+ * memory and the other doesn't.
+ */
+int do_test(char *test_name, void (*test_func)(char *, char *))
+{
+ int offset, width, fd, rc, r;
+ void *mem0, *mem1, *ci0, *ci1;
+
+ printf("\tDoing %s:\t", test_name);
+
+ fd = open("/dev/fb0", O_RDWR);
+ if (fd < 0) {
+ printf("\n");
+ perror("Can't open /dev/fb0 now?");
+ return 1;
+ }
+
+ ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
+ fd, 0x0);
+ ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
+ fd, bufsize);
+ if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) {
+ printf("\n");
+ perror("mmap failed");
+ SKIP_IF(1);
+ }
+
+ rc = posix_memalign(&mem0, bufsize, bufsize);
+ if (rc) {
+ printf("\n");
+ return rc;
+ }
+
+ rc = posix_memalign(&mem1, bufsize, bufsize);
+ if (rc) {
+ printf("\n");
+ free(mem0);
+ return rc;
+ }
+
+ rc = 0;
+ /* offset = 0 no alignment fault, so skip */
+ for (offset = 1; offset < 16; offset++) {
+ width = 16; /* vsx == 16 bytes */
+ r = 0;
+
+ /* load pattern into memory byte by byte */
+ preload_data(ci0, offset, width);
+ preload_data(mem0, offset, width); // FIXME: remove??
+ memcpy(ci0, mem0, bufsize);
+ memcpy(ci1, mem1, bufsize); /* initialise output to the same */
+
+ /* sanity check */
+ test_memcmp(mem0, ci0, width, offset, test_name);
+
+ r |= test_memcpy(ci1, ci0, width, offset, test_func);
+ r |= test_memcpy(mem1, mem0, width, offset, test_func);
+ if (r && !debug) {
+ printf("FAILED: Got signal");
+ rc = 1;
+ break;
+ }
+
+ r |= test_memcmp(mem1, ci1, width, offset, test_name);
+ if (r && !debug) {
+ printf("FAILED: Wrong Data");
+ rc = 1;
+ break;
+ }
+ }
+
+ if (rc == 0)
+ printf("PASSED");
+
+ printf("\n");
+
+ munmap(ci0, bufsize);
+ munmap(ci1, bufsize);
+ free(mem0);
+ free(mem1);
+ close(fd);
+
+ return rc;
+}
+
+static bool can_open_fb0(void)
+{
+ int fd;
+
+ fd = open("/dev/fb0", O_RDWR);
+ if (fd < 0)
+ return false;
+
+ close(fd);
+ return true;
+}
+
+int test_alignment_handler_vsx_206(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+ printf("VSX: 2.06B\n");
+ LOAD_VSX_XFORM_TEST(lxvd2x);
+ LOAD_VSX_XFORM_TEST(lxvw4x);
+ LOAD_VSX_XFORM_TEST(lxsdx);
+ LOAD_VSX_XFORM_TEST(lxvdsx);
+ STORE_VSX_XFORM_TEST(stxvd2x);
+ STORE_VSX_XFORM_TEST(stxvw4x);
+ STORE_VSX_XFORM_TEST(stxsdx);
+ return rc;
+}
+
+int test_alignment_handler_vsx_207(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
+ printf("VSX: 2.07B\n");
+ LOAD_VSX_XFORM_TEST(lxsspx);
+ LOAD_VSX_XFORM_TEST(lxsiwax);
+ LOAD_VSX_XFORM_TEST(lxsiwzx);
+ STORE_VSX_XFORM_TEST(stxsspx);
+ STORE_VSX_XFORM_TEST(stxsiwx);
+ return rc;
+}
+
+int test_alignment_handler_vsx_300(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
+ printf("VSX: 3.00B\n");
+ LOAD_VMX_DFORM_TEST(lxsd);
+ LOAD_VSX_XFORM_TEST(lxsibzx);
+ LOAD_VSX_XFORM_TEST(lxsihzx);
+ LOAD_VMX_DFORM_TEST(lxssp);
+ LOAD_VSX_DFORM_TEST(lxv);
+ LOAD_VSX_XFORM_TEST(lxvb16x);
+ LOAD_VSX_XFORM_TEST(lxvh8x);
+ LOAD_VSX_XFORM_TEST(lxvx);
+ LOAD_VSX_XFORM_TEST(lxvwsx);
+ LOAD_VSX_XFORM_TEST(lxvl);
+ LOAD_VSX_XFORM_TEST(lxvll);
+ STORE_VMX_DFORM_TEST(stxsd);
+ STORE_VSX_XFORM_TEST(stxsibx);
+ STORE_VSX_XFORM_TEST(stxsihx);
+ STORE_VMX_DFORM_TEST(stxssp);
+ STORE_VSX_DFORM_TEST(stxv);
+ STORE_VSX_XFORM_TEST(stxvb16x);
+ STORE_VSX_XFORM_TEST(stxvh8x);
+ STORE_VSX_XFORM_TEST(stxvx);
+ STORE_VSX_XFORM_TEST(stxvl);
+ STORE_VSX_XFORM_TEST(stxvll);
+ return rc;
+}
+
+int test_alignment_handler_integer(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+
+ printf("Integer\n");
+ LOAD_DFORM_TEST(lbz);
+ LOAD_DFORM_TEST(lbzu);
+ LOAD_XFORM_TEST(lbzx);
+ LOAD_XFORM_TEST(lbzux);
+ LOAD_DFORM_TEST(lhz);
+ LOAD_DFORM_TEST(lhzu);
+ LOAD_XFORM_TEST(lhzx);
+ LOAD_XFORM_TEST(lhzux);
+ LOAD_DFORM_TEST(lha);
+ LOAD_DFORM_TEST(lhau);
+ LOAD_XFORM_TEST(lhax);
+ LOAD_XFORM_TEST(lhaux);
+ LOAD_XFORM_TEST(lhbrx);
+ LOAD_DFORM_TEST(lwz);
+ LOAD_DFORM_TEST(lwzu);
+ LOAD_XFORM_TEST(lwzx);
+ LOAD_XFORM_TEST(lwzux);
+ LOAD_DFORM_TEST(lwa);
+ LOAD_XFORM_TEST(lwax);
+ LOAD_XFORM_TEST(lwaux);
+ LOAD_XFORM_TEST(lwbrx);
+ LOAD_DFORM_TEST(ld);
+ LOAD_DFORM_TEST(ldu);
+ LOAD_XFORM_TEST(ldx);
+ LOAD_XFORM_TEST(ldux);
+ STORE_DFORM_TEST(stb);
+ STORE_XFORM_TEST(stbx);
+ STORE_DFORM_TEST(stbu);
+ STORE_XFORM_TEST(stbux);
+ STORE_DFORM_TEST(sth);
+ STORE_XFORM_TEST(sthx);
+ STORE_DFORM_TEST(sthu);
+ STORE_XFORM_TEST(sthux);
+ STORE_XFORM_TEST(sthbrx);
+ STORE_DFORM_TEST(stw);
+ STORE_XFORM_TEST(stwx);
+ STORE_DFORM_TEST(stwu);
+ STORE_XFORM_TEST(stwux);
+ STORE_XFORM_TEST(stwbrx);
+ STORE_DFORM_TEST(std);
+ STORE_XFORM_TEST(stdx);
+ STORE_DFORM_TEST(stdu);
+ STORE_XFORM_TEST(stdux);
+
+#ifdef __BIG_ENDIAN__
+ LOAD_DFORM_TEST(lmw);
+ STORE_DFORM_TEST(stmw);
+#endif
+
+ return rc;
+}
+
+int test_alignment_handler_integer_206(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+ printf("Integer: 2.06\n");
+
+ LOAD_XFORM_TEST(ldbrx);
+ STORE_XFORM_TEST(stdbrx);
+
+ return rc;
+}
+
+int test_alignment_handler_vmx(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+ SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC));
+
+ printf("VMX\n");
+ LOAD_VMX_XFORM_TEST(lvx);
+
+ /*
+ * FIXME: These loads only load part of the register, so our
+ * testing method doesn't work. Also they don't take alignment
+ * faults, so it's kinda pointless anyway
+ *
+ LOAD_VMX_XFORM_TEST(lvebx)
+ LOAD_VMX_XFORM_TEST(lvehx)
+ LOAD_VMX_XFORM_TEST(lvewx)
+ LOAD_VMX_XFORM_TEST(lvxl)
+ */
+ STORE_VMX_XFORM_TEST(stvx);
+ STORE_VMX_XFORM_TEST(stvebx);
+ STORE_VMX_XFORM_TEST(stvehx);
+ STORE_VMX_XFORM_TEST(stvewx);
+ STORE_VMX_XFORM_TEST(stvxl);
+ return rc;
+}
+
+int test_alignment_handler_fp(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+
+ printf("Floating point\n");
+ LOAD_FLOAT_DFORM_TEST(lfd);
+ LOAD_FLOAT_XFORM_TEST(lfdx);
+ LOAD_FLOAT_DFORM_TEST(lfdu);
+ LOAD_FLOAT_XFORM_TEST(lfdux);
+ LOAD_FLOAT_DFORM_TEST(lfs);
+ LOAD_FLOAT_XFORM_TEST(lfsx);
+ LOAD_FLOAT_DFORM_TEST(lfsu);
+ LOAD_FLOAT_XFORM_TEST(lfsux);
+ STORE_FLOAT_DFORM_TEST(stfd);
+ STORE_FLOAT_XFORM_TEST(stfdx);
+ STORE_FLOAT_DFORM_TEST(stfdu);
+ STORE_FLOAT_XFORM_TEST(stfdux);
+ STORE_FLOAT_DFORM_TEST(stfs);
+ STORE_FLOAT_XFORM_TEST(stfsx);
+ STORE_FLOAT_DFORM_TEST(stfsu);
+ STORE_FLOAT_XFORM_TEST(stfsux);
+ STORE_FLOAT_XFORM_TEST(stfiwx);
+
+ return rc;
+}
+
+int test_alignment_handler_fp_205(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_05));
+
+ printf("Floating point: 2.05\n");
+
+ LOAD_FLOAT_DFORM_TEST(lfdp);
+ LOAD_FLOAT_XFORM_TEST(lfdpx);
+ LOAD_FLOAT_XFORM_TEST(lfiwax);
+ STORE_FLOAT_DFORM_TEST(stfdp);
+ STORE_FLOAT_XFORM_TEST(stfdpx);
+
+ return rc;
+}
+
+int test_alignment_handler_fp_206(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+ printf("Floating point: 2.06\n");
+
+ LOAD_FLOAT_XFORM_TEST(lfiwzx);
+
+ return rc;
+}
+
+void usage(char *prog)
+{
+ printf("Usage: %s [options]\n", prog);
+ printf(" -d Enable debug error output\n");
+ printf("\n");
+ printf("This test requires a POWER8 or POWER9 CPU and a usable ");
+ printf("framebuffer at /dev/fb0.\n");
+}
+
+int main(int argc, char *argv[])
+{
+
+ struct sigaction sa;
+ int rc = 0;
+ int option = 0;
+
+ while ((option = getopt(argc, argv, "d")) != -1) {
+ switch (option) {
+ case 'd':
+ debug++;
+ break;
+ default:
+ usage(argv[0]);
+ exit(1);
+ }
+ }
+
+ bufsize = getpagesize();
+
+ sa.sa_sigaction = sighandler;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSEGV, &sa, NULL) == -1
+ || sigaction(SIGBUS, &sa, NULL) == -1
+ || sigaction(SIGILL, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(1);
+ }
+
+ rc |= test_harness(test_alignment_handler_vsx_206,
+ "test_alignment_handler_vsx_206");
+ rc |= test_harness(test_alignment_handler_vsx_207,
+ "test_alignment_handler_vsx_207");
+ rc |= test_harness(test_alignment_handler_vsx_300,
+ "test_alignment_handler_vsx_300");
+ rc |= test_harness(test_alignment_handler_integer,
+ "test_alignment_handler_integer");
+ rc |= test_harness(test_alignment_handler_integer_206,
+ "test_alignment_handler_integer_206");
+ rc |= test_harness(test_alignment_handler_vmx,
+ "test_alignment_handler_vmx");
+ rc |= test_harness(test_alignment_handler_fp,
+ "test_alignment_handler_fp");
+ rc |= test_harness(test_alignment_handler_fp_205,
+ "test_alignment_handler_fp_205");
+ rc |= test_harness(test_alignment_handler_fp_206,
+ "test_alignment_handler_fp_206");
+ return rc;
+}
diff --git a/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c b/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c
new file mode 100644
index 000000000..5a9589987
--- /dev/null
+++ b/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2016, Chris Smart, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Calls to copy_first which are not 128-byte aligned should be
+ * caught and sent a SIGBUS.
+ *
+ */
+
+#include <signal.h>
+#include <string.h>
+#include <unistd.h>
+#include "utils.h"
+#include "instructions.h"
+
+unsigned int expected_instruction = PPC_INST_COPY_FIRST;
+unsigned int instruction_mask = 0xfc2007fe;
+
+void signal_action_handler(int signal_num, siginfo_t *info, void *ptr)
+{
+ ucontext_t *ctx = ptr;
+#ifdef __powerpc64__
+ unsigned int *pc = (unsigned int *)ctx->uc_mcontext.gp_regs[PT_NIP];
+#else
+ unsigned int *pc = (unsigned int *)ctx->uc_mcontext.uc_regs->gregs[PT_NIP];
+#endif
+
+ /*
+ * Check that the signal was on the correct instruction, using a
+ * mask because the compiler assigns the register at RB.
+ */
+ if ((*pc & instruction_mask) == expected_instruction)
+ _exit(0); /* We hit the right instruction */
+
+ _exit(1);
+}
+
+void setup_signal_handler(void)
+{
+ struct sigaction signal_action;
+
+ memset(&signal_action, 0, sizeof(signal_action));
+ signal_action.sa_sigaction = signal_action_handler;
+ signal_action.sa_flags = SA_SIGINFO;
+ sigaction(SIGBUS, &signal_action, NULL);
+}
+
+char cacheline_buf[128] __cacheline_aligned;
+
+int test_copy_first_unaligned(void)
+{
+ /* Only run this test on a P9 or later */
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
+
+ /* Register our signal handler with SIGBUS */
+ setup_signal_handler();
+
+ /* +1 makes buf unaligned */
+ copy_first(cacheline_buf+1);
+
+ /* We should not get here */
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_copy_first_unaligned, "test_copy_first_unaligned");
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/.gitignore b/tools/testing/selftests/powerpc/benchmarks/.gitignore
new file mode 100644
index 000000000..9161679b1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/.gitignore
@@ -0,0 +1,7 @@
+gettimeofday
+context_switch
+fork
+exec_target
+mmap_bench
+futex_bench
+null_syscall
diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
new file mode 100644
index 000000000..d40300a65
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench null_syscall
+TEST_GEN_FILES := exec_target
+
+CFLAGS += -O2
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
+
+$(OUTPUT)/context_switch: ../utils.c
+$(OUTPUT)/context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec
+$(OUTPUT)/context_switch: LDLIBS += -lpthread
+
+$(OUTPUT)/fork: LDLIBS += -lpthread
+
+$(OUTPUT)/exec_target: CFLAGS += -static -nostartfiles
diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
new file mode 100644
index 000000000..9ec767469
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
@@ -0,0 +1,506 @@
+/*
+ * Context switch microbenchmark.
+ *
+ * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <signal.h>
+#include <assert.h>
+#include <pthread.h>
+#include <limits.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <sys/sysinfo.h>
+#include <sys/types.h>
+#include <sys/shm.h>
+#include <linux/futex.h>
+#ifdef __powerpc__
+#include <altivec.h>
+#endif
+#include "utils.h"
+
+static unsigned int timeout = 30;
+
+static int touch_vdso;
+struct timeval tv;
+
+static int touch_fp = 1;
+double fp;
+
+static int touch_vector = 1;
+vector int a, b, c;
+
+#ifdef __powerpc__
+static int touch_altivec = 1;
+
+/*
+ * Note: LTO (Link Time Optimisation) doesn't play well with this function
+ * attribute. Be very careful enabling LTO for this test.
+ */
+static void __attribute__((__target__("no-vsx"))) altivec_touch_fn(void)
+{
+ c = a + b;
+}
+#endif
+
+static void touch(void)
+{
+ if (touch_vdso)
+ gettimeofday(&tv, NULL);
+
+ if (touch_fp)
+ fp += 0.1;
+
+#ifdef __powerpc__
+ if (touch_altivec)
+ altivec_touch_fn();
+#endif
+
+ if (touch_vector)
+ c = a + b;
+
+ asm volatile("# %0 %1 %2": : "r"(&tv), "r"(&fp), "r"(&c));
+}
+
+static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu)
+{
+ int rc;
+ pthread_t tid;
+ cpu_set_t cpuset;
+ pthread_attr_t attr;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ rc = pthread_attr_init(&attr);
+ if (rc) {
+ errno = rc;
+ perror("pthread_attr_init");
+ exit(1);
+ }
+
+ rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+ if (rc) {
+ errno = rc;
+ perror("pthread_attr_setaffinity_np");
+ exit(1);
+ }
+
+ rc = pthread_create(&tid, &attr, fn, arg);
+ if (rc) {
+ errno = rc;
+ perror("pthread_create");
+ exit(1);
+ }
+}
+
+static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu)
+{
+ int pid, ncpus;
+ cpu_set_t *cpuset;
+ size_t size;
+
+ pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+
+ if (pid)
+ return;
+
+ ncpus = get_nprocs();
+ size = CPU_ALLOC_SIZE(ncpus);
+ cpuset = CPU_ALLOC(ncpus);
+ if (!cpuset) {
+ perror("malloc");
+ exit(1);
+ }
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(cpu, size, cpuset);
+
+ if (sched_setaffinity(0, size, cpuset)) {
+ perror("sched_setaffinity");
+ CPU_FREE(cpuset);
+ exit(1);
+ }
+
+ CPU_FREE(cpuset);
+ fn(arg);
+
+ exit(0);
+}
+
+static unsigned long iterations;
+static unsigned long iterations_prev;
+
+static void sigalrm_handler(int junk)
+{
+ unsigned long i = iterations;
+
+ printf("%ld\n", i - iterations_prev);
+ iterations_prev = i;
+
+ if (--timeout == 0)
+ kill(0, SIGUSR1);
+
+ alarm(1);
+}
+
+static void sigusr1_handler(int junk)
+{
+ exit(0);
+}
+
+struct actions {
+ void (*setup)(int, int);
+ void *(*thread1)(void *);
+ void *(*thread2)(void *);
+};
+
+#define READ 0
+#define WRITE 1
+
+static int pipe_fd1[2];
+static int pipe_fd2[2];
+
+static void pipe_setup(int cpu1, int cpu2)
+{
+ if (pipe(pipe_fd1) || pipe(pipe_fd2))
+ exit(1);
+}
+
+static void *pipe_thread1(void *arg)
+{
+ signal(SIGALRM, sigalrm_handler);
+ alarm(1);
+
+ while (1) {
+ assert(read(pipe_fd1[READ], &c, 1) == 1);
+ touch();
+
+ assert(write(pipe_fd2[WRITE], &c, 1) == 1);
+ touch();
+
+ iterations += 2;
+ }
+
+ return NULL;
+}
+
+static void *pipe_thread2(void *arg)
+{
+ while (1) {
+ assert(write(pipe_fd1[WRITE], &c, 1) == 1);
+ touch();
+
+ assert(read(pipe_fd2[READ], &c, 1) == 1);
+ touch();
+ }
+
+ return NULL;
+}
+
+static struct actions pipe_actions = {
+ .setup = pipe_setup,
+ .thread1 = pipe_thread1,
+ .thread2 = pipe_thread2,
+};
+
+static void yield_setup(int cpu1, int cpu2)
+{
+ if (cpu1 != cpu2) {
+ fprintf(stderr, "Both threads must be on the same CPU for yield test\n");
+ exit(1);
+ }
+}
+
+static void *yield_thread1(void *arg)
+{
+ signal(SIGALRM, sigalrm_handler);
+ alarm(1);
+
+ while (1) {
+ sched_yield();
+ touch();
+
+ iterations += 2;
+ }
+
+ return NULL;
+}
+
+static void *yield_thread2(void *arg)
+{
+ while (1) {
+ sched_yield();
+ touch();
+ }
+
+ return NULL;
+}
+
+static struct actions yield_actions = {
+ .setup = yield_setup,
+ .thread1 = yield_thread1,
+ .thread2 = yield_thread2,
+};
+
+static long sys_futex(void *addr1, int op, int val1, struct timespec *timeout,
+ void *addr2, int val3)
+{
+ return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3);
+}
+
+static unsigned long cmpxchg(unsigned long *p, unsigned long expected,
+ unsigned long desired)
+{
+ unsigned long exp = expected;
+
+ __atomic_compare_exchange_n(p, &exp, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+ return exp;
+}
+
+static unsigned long xchg(unsigned long *p, unsigned long val)
+{
+ return __atomic_exchange_n(p, val, __ATOMIC_SEQ_CST);
+}
+
+static int processes;
+
+static int mutex_lock(unsigned long *m)
+{
+ int c;
+ int flags = FUTEX_WAIT;
+ if (!processes)
+ flags |= FUTEX_PRIVATE_FLAG;
+
+ c = cmpxchg(m, 0, 1);
+ if (!c)
+ return 0;
+
+ if (c == 1)
+ c = xchg(m, 2);
+
+ while (c) {
+ sys_futex(m, flags, 2, NULL, NULL, 0);
+ c = xchg(m, 2);
+ }
+
+ return 0;
+}
+
+static int mutex_unlock(unsigned long *m)
+{
+ int flags = FUTEX_WAKE;
+ if (!processes)
+ flags |= FUTEX_PRIVATE_FLAG;
+
+ if (*m == 2)
+ *m = 0;
+ else if (xchg(m, 0) == 1)
+ return 0;
+
+ sys_futex(m, flags, 1, NULL, NULL, 0);
+
+ return 0;
+}
+
+static unsigned long *m1, *m2;
+
+static void futex_setup(int cpu1, int cpu2)
+{
+ if (!processes) {
+ static unsigned long _m1, _m2;
+ m1 = &_m1;
+ m2 = &_m2;
+ } else {
+ int shmid;
+ void *shmaddr;
+
+ shmid = shmget(IPC_PRIVATE, getpagesize(), SHM_R | SHM_W);
+ if (shmid < 0) {
+ perror("shmget");
+ exit(1);
+ }
+
+ shmaddr = shmat(shmid, NULL, 0);
+ if (shmaddr == (char *)-1) {
+ perror("shmat");
+ shmctl(shmid, IPC_RMID, NULL);
+ exit(1);
+ }
+
+ shmctl(shmid, IPC_RMID, NULL);
+
+ m1 = shmaddr;
+ m2 = shmaddr + sizeof(*m1);
+ }
+
+ *m1 = 0;
+ *m2 = 0;
+
+ mutex_lock(m1);
+ mutex_lock(m2);
+}
+
+static void *futex_thread1(void *arg)
+{
+ signal(SIGALRM, sigalrm_handler);
+ alarm(1);
+
+ while (1) {
+ mutex_lock(m2);
+ mutex_unlock(m1);
+
+ iterations += 2;
+ }
+
+ return NULL;
+}
+
+static void *futex_thread2(void *arg)
+{
+ while (1) {
+ mutex_unlock(m2);
+ mutex_lock(m1);
+ }
+
+ return NULL;
+}
+
+static struct actions futex_actions = {
+ .setup = futex_setup,
+ .thread1 = futex_thread1,
+ .thread2 = futex_thread2,
+};
+
+static struct option options[] = {
+ { "test", required_argument, 0, 't' },
+ { "process", no_argument, &processes, 1 },
+ { "timeout", required_argument, 0, 's' },
+ { "vdso", no_argument, &touch_vdso, 1 },
+ { "no-fp", no_argument, &touch_fp, 0 },
+#ifdef __powerpc__
+ { "no-altivec", no_argument, &touch_altivec, 0 },
+#endif
+ { "no-vector", no_argument, &touch_vector, 0 },
+ { 0, },
+};
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: context_switch2 <options> CPU1 CPU2\n\n");
+ fprintf(stderr, "\t\t--test=X\tpipe, futex or yield (default)\n");
+ fprintf(stderr, "\t\t--process\tUse processes (default threads)\n");
+ fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n");
+ fprintf(stderr, "\t\t--vdso\t\ttouch VDSO\n");
+ fprintf(stderr, "\t\t--no-fp\t\tDon't touch FP\n");
+#ifdef __powerpc__
+ fprintf(stderr, "\t\t--no-altivec\tDon't touch altivec\n");
+#endif
+ fprintf(stderr, "\t\t--no-vector\tDon't touch vector\n");
+}
+
+int main(int argc, char *argv[])
+{
+ signed char c;
+ struct actions *actions = &yield_actions;
+ int cpu1;
+ int cpu2;
+ static void (*start_fn)(void *(*fn)(void *), void *arg, unsigned long cpu);
+
+ while (1) {
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "", options, &option_index);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 0:
+ if (options[option_index].flag != 0)
+ break;
+
+ usage();
+ exit(1);
+ break;
+
+ case 't':
+ if (!strcmp(optarg, "pipe")) {
+ actions = &pipe_actions;
+ } else if (!strcmp(optarg, "yield")) {
+ actions = &yield_actions;
+ } else if (!strcmp(optarg, "futex")) {
+ actions = &futex_actions;
+ } else {
+ usage();
+ exit(1);
+ }
+ break;
+
+ case 's':
+ timeout = atoi(optarg);
+ break;
+
+ default:
+ usage();
+ exit(1);
+ }
+ }
+
+ if (processes)
+ start_fn = start_process_on;
+ else
+ start_fn = start_thread_on;
+
+ if (((argc - optind) != 2)) {
+ cpu1 = cpu2 = pick_online_cpu();
+ } else {
+ cpu1 = atoi(argv[optind++]);
+ cpu2 = atoi(argv[optind++]);
+ }
+
+ printf("Using %s with ", processes ? "processes" : "threads");
+
+ if (actions == &pipe_actions)
+ printf("pipe");
+ else if (actions == &yield_actions)
+ printf("yield");
+ else
+ printf("futex");
+
+ printf(" on cpus %d/%d touching FP:%s altivec:%s vector:%s vdso:%s\n",
+ cpu1, cpu2, touch_fp ? "yes" : "no", touch_altivec ? "yes" : "no",
+ touch_vector ? "yes" : "no", touch_vdso ? "yes" : "no");
+
+ /* Create a new process group so we can signal everyone for exit */
+ setpgid(getpid(), getpid());
+
+ signal(SIGUSR1, sigusr1_handler);
+
+ actions->setup(cpu1, cpu2);
+
+ start_fn(actions->thread1, NULL, cpu1);
+ start_fn(actions->thread2, NULL, cpu2);
+
+ while (1)
+ sleep(3600);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
new file mode 100644
index 000000000..c14b0fc1e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Part of fork context switch microbenchmark.
+ *
+ * Copyright 2018, Anton Blanchard, IBM Corp.
+ */
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+
+void _start(void)
+{
+ syscall(SYS_exit, 0);
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/fork.c b/tools/testing/selftests/powerpc/benchmarks/fork.c
new file mode 100644
index 000000000..d312e638c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/fork.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Context switch microbenchmark.
+ *
+ * Copyright 2018, Anton Blanchard, IBM Corp.
+ */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/shm.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static unsigned int timeout = 30;
+
+static void set_cpu(int cpu)
+{
+ cpu_set_t cpuset;
+
+ if (cpu == -1)
+ return;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) {
+ perror("sched_setaffinity");
+ exit(1);
+ }
+}
+
+static void start_process_on(void *(*fn)(void *), void *arg, int cpu)
+{
+ int pid;
+
+ pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+
+ if (pid)
+ return;
+
+ set_cpu(cpu);
+
+ fn(arg);
+
+ exit(0);
+}
+
+static int cpu;
+static int do_fork = 0;
+static int do_vfork = 0;
+static int do_exec = 0;
+static char *exec_file;
+static int exec_target = 0;
+static unsigned long iterations;
+static unsigned long iterations_prev;
+
+static void run_exec(void)
+{
+ char *const argv[] = { "./exec_target", NULL };
+
+ if (execve("./exec_target", argv, NULL) == -1) {
+ perror("execve");
+ exit(1);
+ }
+}
+
+static void bench_fork(void)
+{
+ while (1) {
+ pid_t pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+ if (pid == 0) {
+ if (do_exec)
+ run_exec();
+ _exit(0);
+ }
+ pid = waitpid(pid, NULL, 0);
+ if (pid == -1) {
+ perror("waitpid");
+ exit(1);
+ }
+ iterations++;
+ }
+}
+
+static void bench_vfork(void)
+{
+ while (1) {
+ pid_t pid = vfork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+ if (pid == 0) {
+ if (do_exec)
+ run_exec();
+ _exit(0);
+ }
+ pid = waitpid(pid, NULL, 0);
+ if (pid == -1) {
+ perror("waitpid");
+ exit(1);
+ }
+ iterations++;
+ }
+}
+
+static void *null_fn(void *arg)
+{
+ pthread_exit(NULL);
+}
+
+static void bench_thread(void)
+{
+ pthread_t tid;
+ cpu_set_t cpuset;
+ pthread_attr_t attr;
+ int rc;
+
+ rc = pthread_attr_init(&attr);
+ if (rc) {
+ errno = rc;
+ perror("pthread_attr_init");
+ exit(1);
+ }
+
+ if (cpu != -1) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+ if (rc) {
+ errno = rc;
+ perror("pthread_attr_setaffinity_np");
+ exit(1);
+ }
+ }
+
+ while (1) {
+ rc = pthread_create(&tid, &attr, null_fn, NULL);
+ if (rc) {
+ errno = rc;
+ perror("pthread_create");
+ exit(1);
+ }
+ rc = pthread_join(tid, NULL);
+ if (rc) {
+ errno = rc;
+ perror("pthread_join");
+ exit(1);
+ }
+ iterations++;
+ }
+}
+
+static void sigalrm_handler(int junk)
+{
+ unsigned long i = iterations;
+
+ printf("%ld\n", i - iterations_prev);
+ iterations_prev = i;
+
+ if (--timeout == 0)
+ kill(0, SIGUSR1);
+
+ alarm(1);
+}
+
+static void sigusr1_handler(int junk)
+{
+ exit(0);
+}
+
+static void *bench_proc(void *arg)
+{
+ signal(SIGALRM, sigalrm_handler);
+ alarm(1);
+
+ if (do_fork)
+ bench_fork();
+ else if (do_vfork)
+ bench_vfork();
+ else
+ bench_thread();
+
+ return NULL;
+}
+
+static struct option options[] = {
+ { "fork", no_argument, &do_fork, 1 },
+ { "vfork", no_argument, &do_vfork, 1 },
+ { "exec", no_argument, &do_exec, 1 },
+ { "timeout", required_argument, 0, 's' },
+ { "exec-target", no_argument, &exec_target, 1 },
+ { NULL },
+};
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: fork <options> CPU\n\n");
+ fprintf(stderr, "\t\t--fork\tUse fork() (default threads)\n");
+ fprintf(stderr, "\t\t--vfork\tUse vfork() (default threads)\n");
+ fprintf(stderr, "\t\t--exec\tAlso exec() (default no exec)\n");
+ fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n");
+ fprintf(stderr, "\t\t--exec-target\tInternal option for exec workload\n");
+}
+
+int main(int argc, char *argv[])
+{
+ signed char c;
+
+ while (1) {
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "", options, &option_index);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 0:
+ if (options[option_index].flag != 0)
+ break;
+
+ usage();
+ exit(1);
+ break;
+
+ case 's':
+ timeout = atoi(optarg);
+ break;
+
+ default:
+ usage();
+ exit(1);
+ }
+ }
+
+ if (do_fork && do_vfork) {
+ usage();
+ exit(1);
+ }
+ if (do_exec && !do_fork && !do_vfork) {
+ usage();
+ exit(1);
+ }
+
+ if (do_exec) {
+ char *dirname = strdup(argv[0]);
+ int i;
+ i = strlen(dirname) - 1;
+ while (i) {
+ if (dirname[i] == '/') {
+ dirname[i] = '\0';
+ if (chdir(dirname) == -1) {
+ perror("chdir");
+ exit(1);
+ }
+ break;
+ }
+ i--;
+ }
+ }
+
+ if (exec_target) {
+ exit(0);
+ }
+
+ if (((argc - optind) != 1)) {
+ cpu = -1;
+ } else {
+ cpu = atoi(argv[optind++]);
+ }
+
+ if (do_exec)
+ exec_file = argv[0];
+
+ set_cpu(cpu);
+
+ printf("Using ");
+ if (do_fork)
+ printf("fork");
+ else if (do_vfork)
+ printf("vfork");
+ else
+ printf("clone");
+
+ if (do_exec)
+ printf(" + exec");
+
+ printf(" on cpu %d\n", cpu);
+
+ /* Create a new process group so we can signal everyone for exit */
+ setpgid(getpid(), getpid());
+
+ signal(SIGUSR1, sigusr1_handler);
+
+ start_process_on(bench_proc, NULL, cpu);
+
+ while (1)
+ sleep(3600);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/futex_bench.c b/tools/testing/selftests/powerpc/benchmarks/futex_bench.c
new file mode 100644
index 000000000..d58e4dc50
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/futex_bench.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2016, Anton Blanchard, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/syscall.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/futex.h>
+
+#include "utils.h"
+
+#define ITERATIONS 100000000
+
+#define futex(A, B, C, D, E, F) syscall(__NR_futex, A, B, C, D, E, F)
+
+int test_futex(void)
+{
+ struct timespec ts_start, ts_end;
+ unsigned long i = ITERATIONS;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+ while (i--) {
+ unsigned int addr = 0;
+ futex(&addr, FUTEX_WAKE, 1, NULL, NULL, 0);
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_end);
+
+ printf("time = %.6f\n", ts_end.tv_sec - ts_start.tv_sec + (ts_end.tv_nsec - ts_start.tv_nsec) / 1e9);
+
+ return 0;
+}
+
+int main(void)
+{
+ test_harness_set_timeout(300);
+ return test_harness(test_futex, "futex_bench");
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c
new file mode 100644
index 000000000..3af3c21e8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2015, Anton Blanchard, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <sys/time.h>
+#include <stdio.h>
+
+#include "utils.h"
+
+static int test_gettimeofday(void)
+{
+ int i;
+
+ struct timeval tv_start, tv_end;
+
+ gettimeofday(&tv_start, NULL);
+
+ for(i = 0; i < 100000000; i++) {
+ gettimeofday(&tv_end, NULL);
+ }
+
+ printf("time = %.6f\n", tv_end.tv_sec - tv_start.tv_sec + (tv_end.tv_usec - tv_start.tv_usec) * 1e-6);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_gettimeofday, "gettimeofday");
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c
new file mode 100644
index 000000000..033de0560
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2016, Anton Blanchard, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <getopt.h>
+
+#include "utils.h"
+
+#define ITERATIONS 5000000
+
+#define MEMSIZE (1UL << 27)
+#define PAGE_SIZE (1UL << 16)
+#define CHUNK_COUNT (MEMSIZE/PAGE_SIZE)
+
+static int pg_fault;
+static int iterations = ITERATIONS;
+
+static struct option options[] = {
+ { "pgfault", no_argument, &pg_fault, 1 },
+ { "iterations", required_argument, 0, 'i' },
+ { 0, },
+};
+
+static void usage(void)
+{
+ printf("mmap_bench <--pgfault> <--iterations count>\n");
+}
+
+int test_mmap(void)
+{
+ struct timespec ts_start, ts_end;
+ unsigned long i = iterations;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+ while (i--) {
+ char *c = mmap(NULL, MEMSIZE, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ FAIL_IF(c == MAP_FAILED);
+ if (pg_fault) {
+ int count;
+ for (count = 0; count < CHUNK_COUNT; count++)
+ c[count << 16] = 'c';
+ }
+ munmap(c, MEMSIZE);
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_end);
+
+ printf("time = %.6f\n", ts_end.tv_sec - ts_start.tv_sec + (ts_end.tv_nsec - ts_start.tv_nsec) / 1e9);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ signed char c;
+ while (1) {
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "", options, &option_index);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 0:
+ if (options[option_index].flag != 0)
+ break;
+
+ usage();
+ exit(1);
+ break;
+ case 'i':
+ iterations = atoi(optarg);
+ break;
+ default:
+ usage();
+ exit(1);
+ }
+ }
+
+ test_harness_set_timeout(300);
+ return test_harness(test_mmap, "mmap_bench");
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/null_syscall.c b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c
new file mode 100644
index 000000000..ecc14d68e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c
@@ -0,0 +1,157 @@
+/*
+ * Test null syscall performance
+ *
+ * Copyright (C) 2009-2015 Anton Blanchard, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define NR_LOOPS 10000000
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <signal.h>
+
+static volatile int soak_done;
+unsigned long long clock_frequency;
+unsigned long long timebase_frequency;
+double timebase_multiplier;
+
+static inline unsigned long long mftb(void)
+{
+ unsigned long low;
+
+ asm volatile("mftb %0" : "=r" (low));
+
+ return low;
+}
+
+static void sigalrm_handler(int unused)
+{
+ soak_done = 1;
+}
+
+/*
+ * Use a timer instead of busy looping on clock_gettime() so we don't
+ * pollute profiles with glibc and VDSO hits.
+ */
+static void cpu_soak_usecs(unsigned long usecs)
+{
+ struct itimerval val;
+
+ memset(&val, 0, sizeof(val));
+ val.it_value.tv_usec = usecs;
+
+ signal(SIGALRM, sigalrm_handler);
+ setitimer(ITIMER_REAL, &val, NULL);
+
+ while (1) {
+ if (soak_done)
+ break;
+ }
+
+ signal(SIGALRM, SIG_DFL);
+}
+
+/*
+ * This only works with recent kernels where cpufreq modifies
+ * /proc/cpuinfo dynamically.
+ */
+static void get_proc_frequency(void)
+{
+ FILE *f;
+ char line[128];
+ char *p, *end;
+ unsigned long v;
+ double d;
+ char *override;
+
+ /* Try to get out of low power/low frequency mode */
+ cpu_soak_usecs(0.25 * 1000000);
+
+ f = fopen("/proc/cpuinfo", "r");
+ if (f == NULL)
+ return;
+
+ timebase_frequency = 0;
+
+ while (fgets(line, sizeof(line), f) != NULL) {
+ if (strncmp(line, "timebase", 8) == 0) {
+ p = strchr(line, ':');
+ if (p != NULL) {
+ v = strtoull(p + 1, &end, 0);
+ if (end != p + 1)
+ timebase_frequency = v;
+ }
+ }
+
+ if (((strncmp(line, "clock", 5) == 0) ||
+ (strncmp(line, "cpu MHz", 7) == 0))) {
+ p = strchr(line, ':');
+ if (p != NULL) {
+ d = strtod(p + 1, &end);
+ if (end != p + 1) {
+ /* Find fastest clock frequency */
+ if ((d * 1000000ULL) > clock_frequency)
+ clock_frequency = d * 1000000ULL;
+ }
+ }
+ }
+ }
+
+ fclose(f);
+
+ override = getenv("FREQUENCY");
+ if (override)
+ clock_frequency = strtoull(override, NULL, 10);
+
+ if (timebase_frequency)
+ timebase_multiplier = (double)clock_frequency
+ / timebase_frequency;
+ else
+ timebase_multiplier = 1;
+}
+
+static void do_null_syscall(unsigned long nr)
+{
+ unsigned long i;
+
+ for (i = 0; i < nr; i++)
+ getppid();
+}
+
+#define TIME(A, STR) \
+
+int main(void)
+{
+ unsigned long tb_start, tb_now;
+ struct timespec tv_start, tv_now;
+ unsigned long long elapsed_ns, elapsed_tb;
+
+ get_proc_frequency();
+
+ clock_gettime(CLOCK_MONOTONIC, &tv_start);
+ tb_start = mftb();
+
+ do_null_syscall(NR_LOOPS);
+
+ clock_gettime(CLOCK_MONOTONIC, &tv_now);
+ tb_now = mftb();
+
+ elapsed_ns = (tv_now.tv_sec - tv_start.tv_sec) * 1000000000ULL +
+ (tv_now.tv_nsec - tv_start.tv_nsec);
+ elapsed_tb = tb_now - tb_start;
+
+ printf("%10.2f ns %10.2f cycles\n", (float)elapsed_ns / NR_LOOPS,
+ (float)elapsed_tb * timebase_multiplier / NR_LOOPS);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/cache_shape/.gitignore b/tools/testing/selftests/powerpc/cache_shape/.gitignore
new file mode 100644
index 000000000..ec1848434
--- /dev/null
+++ b/tools/testing/selftests/powerpc/cache_shape/.gitignore
@@ -0,0 +1 @@
+cache_shape
diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile
new file mode 100644
index 000000000..689f6c8eb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/cache_shape/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := cache_shape
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/cache_shape/cache_shape.c b/tools/testing/selftests/powerpc/cache_shape/cache_shape.c
new file mode 100644
index 000000000..29ec07eba
--- /dev/null
+++ b/tools/testing/selftests/powerpc/cache_shape/cache_shape.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2017, Michael Ellerman, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <link.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+#ifndef AT_L1I_CACHESIZE
+#define AT_L1I_CACHESIZE 40
+#define AT_L1I_CACHEGEOMETRY 41
+#define AT_L1D_CACHESIZE 42
+#define AT_L1D_CACHEGEOMETRY 43
+#define AT_L2_CACHESIZE 44
+#define AT_L2_CACHEGEOMETRY 45
+#define AT_L3_CACHESIZE 46
+#define AT_L3_CACHEGEOMETRY 47
+#endif
+
+static void print_size(const char *label, uint32_t val)
+{
+ printf("%s cache size: %#10x %10dB %10dK\n", label, val, val, val / 1024);
+}
+
+static void print_geo(const char *label, uint32_t val)
+{
+ uint16_t assoc;
+
+ printf("%s line size: %#10x ", label, val & 0xFFFF);
+
+ assoc = val >> 16;
+ if (assoc)
+ printf("%u-way", assoc);
+ else
+ printf("fully");
+
+ printf(" associative\n");
+}
+
+static int test_cache_shape()
+{
+ static char buffer[4096];
+ ElfW(auxv_t) *p;
+ int found;
+
+ FAIL_IF(read_auxv(buffer, sizeof(buffer)));
+
+ found = 0;
+
+ p = find_auxv_entry(AT_L1I_CACHESIZE, buffer);
+ if (p) {
+ found++;
+ print_size("L1I ", (uint32_t)p->a_un.a_val);
+ }
+
+ p = find_auxv_entry(AT_L1I_CACHEGEOMETRY, buffer);
+ if (p) {
+ found++;
+ print_geo("L1I ", (uint32_t)p->a_un.a_val);
+ }
+
+ p = find_auxv_entry(AT_L1D_CACHESIZE, buffer);
+ if (p) {
+ found++;
+ print_size("L1D ", (uint32_t)p->a_un.a_val);
+ }
+
+ p = find_auxv_entry(AT_L1D_CACHEGEOMETRY, buffer);
+ if (p) {
+ found++;
+ print_geo("L1D ", (uint32_t)p->a_un.a_val);
+ }
+
+ p = find_auxv_entry(AT_L2_CACHESIZE, buffer);
+ if (p) {
+ found++;
+ print_size("L2 ", (uint32_t)p->a_un.a_val);
+ }
+
+ p = find_auxv_entry(AT_L2_CACHEGEOMETRY, buffer);
+ if (p) {
+ found++;
+ print_geo("L2 ", (uint32_t)p->a_un.a_val);
+ }
+
+ p = find_auxv_entry(AT_L3_CACHESIZE, buffer);
+ if (p) {
+ found++;
+ print_size("L3 ", (uint32_t)p->a_un.a_val);
+ }
+
+ p = find_auxv_entry(AT_L3_CACHEGEOMETRY, buffer);
+ if (p) {
+ found++;
+ print_geo("L3 ", (uint32_t)p->a_un.a_val);
+ }
+
+ /* If we found none we're probably on a system where they don't exist */
+ SKIP_IF(found == 0);
+
+ /* But if we found any, we expect to find them all */
+ FAIL_IF(found != 8);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_cache_shape, "cache_shape");
+}
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
new file mode 100644
index 000000000..ce12cd0e2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -0,0 +1,13 @@
+copyuser_64_t0
+copyuser_64_t1
+copyuser_64_t2
+copyuser_power7_t0
+copyuser_power7_t1
+memcpy_64_t0
+memcpy_64_t1
+memcpy_64_t2
+memcpy_power7_t0
+memcpy_power7_t1
+copyuser_64_exc_t0
+copyuser_64_exc_t1
+copyuser_64_exc_t2
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
new file mode 100644
index 000000000..44574f381
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+# The loops are all 64-bit code
+CFLAGS += -m64
+CFLAGS += -I$(CURDIR)
+CFLAGS += -D SELFTEST
+CFLAGS += -maltivec
+CFLAGS += -mcpu=power4
+
+# Use our CFLAGS for the implicit .S rule & set the asm machine type
+ASFLAGS = $(CFLAGS) -Wa,-mpower4
+
+TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
+ copyuser_p7_t0 copyuser_p7_t1 \
+ memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
+ memcpy_p7_t0 memcpy_p7_t1 \
+ copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
+
+EXTRA_SOURCES := validate.c ../harness.c stubs.S
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(OUTPUT)/copyuser_64_t%: copyuser_64.S $(EXTRA_SOURCES)
+ $(CC) $(CPPFLAGS) $(CFLAGS) \
+ -D COPY_LOOP=test___copy_tofrom_user_base \
+ -D SELFTEST_CASE=$(subst copyuser_64_t,,$(notdir $@)) \
+ -o $@ $^
+
+$(OUTPUT)/copyuser_p7_t%: copyuser_power7.S $(EXTRA_SOURCES)
+ $(CC) $(CPPFLAGS) $(CFLAGS) \
+ -D COPY_LOOP=test___copy_tofrom_user_power7 \
+ -D SELFTEST_CASE=$(subst copyuser_p7_t,,$(notdir $@)) \
+ -o $@ $^
+
+# Strictly speaking, we only need the memcpy_64 test cases for big-endian
+$(OUTPUT)/memcpy_64_t%: memcpy_64.S $(EXTRA_SOURCES)
+ $(CC) $(CPPFLAGS) $(CFLAGS) \
+ -D COPY_LOOP=test_memcpy \
+ -D SELFTEST_CASE=$(subst memcpy_64_t,,$(notdir $@)) \
+ -o $@ $^
+
+$(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES)
+ $(CC) $(CPPFLAGS) $(CFLAGS) \
+ -D COPY_LOOP=test_memcpy_power7 \
+ -D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
+ -o $@ $^
+
+$(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
+ copy_tofrom_user_reference.S stubs.S
+ $(CC) $(CPPFLAGS) $(CFLAGS) \
+ -D COPY_LOOP=test___copy_tofrom_user_base \
+ -D SELFTEST_CASE=$(subst copyuser_64_exc_t,,$(notdir $@)) \
+ -o $@ $^
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h b/tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/export.h b/tools/testing/selftests/powerpc/copyloops/asm/export.h
new file mode 100644
index 000000000..0bab35f67
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/export.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define EXPORT_SYMBOL(x)
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h b/tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
new file mode 100644
index 000000000..0605df807
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SELFTESTS_POWERPC_PPC_ASM_H
+#define __SELFTESTS_POWERPC_PPC_ASM_H
+#include <ppc-asm.h>
+
+#define CONFIG_ALTIVEC
+
+#define r1 1
+
+#define R14 r14
+#define R15 r15
+#define R16 r16
+#define R17 r17
+#define R18 r18
+#define R19 r19
+#define R20 r20
+#define R21 r21
+#define R22 r22
+#define R29 r29
+#define R30 r30
+#define R31 r31
+
+#define STACKFRAMESIZE 256
+#define STK_REG(i) (112 + ((i)-14)*8)
+
+#define _GLOBAL(A) FUNC_START(test_ ## A)
+#define _GLOBAL_TOC(A) _GLOBAL(A)
+
+#define PPC_MTOCRF(A, B) mtocrf A, B
+
+#define EX_TABLE(x, y) \
+ .section __ex_table,"a"; \
+ .8byte x, y; \
+ .previous
+
+#define BEGIN_FTR_SECTION .if test_feature
+#define FTR_SECTION_ELSE .else
+#define ALT_FTR_SECTION_END_IFCLR(x) .endif
+#define ALT_FTR_SECTION_END_IFSET(x) .endif
+#define ALT_FTR_SECTION_END(x, y) .endif
+#define END_FTR_SECTION_IFCLR(x) .endif
+#define END_FTR_SECTION_IFSET(x) .endif
+
+/* Default to taking the first of any alternative feature sections */
+test_feature = 1
+
+#endif /* __SELFTESTS_POWERPC_PPC_ASM_H */
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/processor.h b/tools/testing/selftests/powerpc/copyloops/asm/processor.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/processor.h
diff --git a/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S b/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S
new file mode 100644
index 000000000..3363b8640
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S
@@ -0,0 +1,24 @@
+#include <asm/ppc_asm.h>
+
+_GLOBAL(copy_tofrom_user_reference)
+ cmpdi r5,0
+ beq 4f
+
+ mtctr r5
+
+1: lbz r6,0(r4)
+2: stb r6,0(r3)
+ addi r3,r3,1
+ addi r4,r4,1
+ bdnz 1b
+
+3: mfctr r3
+ blr
+
+4: mr r3,r5
+ blr
+
+.section __ex_table,"a"
+ .llong 1b,3b
+ .llong 2b,3b
+.text
diff --git a/tools/testing/selftests/powerpc/copyloops/copyuser_64.S b/tools/testing/selftests/powerpc/copyloops/copyuser_64.S
new file mode 120000
index 000000000..f1c418a25
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copyuser_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/copyuser_64.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S b/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S
new file mode 120000
index 000000000..478689598
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/copyuser_power7.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/exc_validate.c b/tools/testing/selftests/powerpc/copyloops/exc_validate.c
new file mode 100644
index 000000000..c896ea9a7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/exc_validate.c
@@ -0,0 +1,124 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include "utils.h"
+
+extern char __start___ex_table[];
+extern char __stop___ex_table[];
+
+#if defined(__powerpc64__)
+#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP]
+#elif defined(__powerpc__)
+#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.uc_regs->gregs[PT_NIP]
+#else
+#error implement UCONTEXT_NIA
+#endif
+
+static void segv_handler(int signr, siginfo_t *info, void *ptr)
+{
+ ucontext_t *uc = (ucontext_t *)ptr;
+ unsigned long addr = (unsigned long)info->si_addr;
+ unsigned long *ip = &UCONTEXT_NIA(uc);
+ unsigned long *ex_p = (unsigned long *)__start___ex_table;
+
+ while (ex_p < (unsigned long *)__stop___ex_table) {
+ unsigned long insn, fixup;
+
+ insn = *ex_p++;
+ fixup = *ex_p++;
+
+ if (insn == *ip) {
+ *ip = fixup;
+ return;
+ }
+ }
+
+ printf("No exception table match for NIA %lx ADDR %lx\n", *ip, addr);
+ abort();
+}
+
+static void setup_segv_handler(void)
+{
+ struct sigaction action;
+
+ memset(&action, 0, sizeof(action));
+ action.sa_sigaction = segv_handler;
+ action.sa_flags = SA_SIGINFO;
+ sigaction(SIGSEGV, &action, NULL);
+}
+
+unsigned long COPY_LOOP(void *to, const void *from, unsigned long size);
+unsigned long test_copy_tofrom_user_reference(void *to, const void *from, unsigned long size);
+
+static int total_passed;
+static int total_failed;
+
+static void do_one_test(char *dstp, char *srcp, unsigned long len)
+{
+ unsigned long got, expected;
+
+ got = COPY_LOOP(dstp, srcp, len);
+ expected = test_copy_tofrom_user_reference(dstp, srcp, len);
+
+ if (got != expected) {
+ total_failed++;
+ printf("FAIL from=%p to=%p len=%ld returned %ld, expected %ld\n",
+ srcp, dstp, len, got, expected);
+ //abort();
+ } else
+ total_passed++;
+}
+
+//#define MAX_LEN 512
+#define MAX_LEN 16
+
+int test_copy_exception(void)
+{
+ int page_size;
+ static char *p, *q;
+ unsigned long src, dst, len;
+
+ page_size = getpagesize();
+ p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+
+ if (p == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ memset(p, 0, page_size);
+
+ setup_segv_handler();
+
+ if (mprotect(p + page_size, page_size, PROT_NONE)) {
+ perror("mprotect");
+ exit(1);
+ }
+
+ q = p + page_size - MAX_LEN;
+
+ for (src = 0; src < MAX_LEN; src++) {
+ for (dst = 0; dst < MAX_LEN; dst++) {
+ for (len = 0; len < MAX_LEN+1; len++) {
+ // printf("from=%p to=%p len=%ld\n", q+dst, q+src, len);
+ do_one_test(q+dst, q+src, len);
+ }
+ }
+ }
+
+ printf("Totals:\n");
+ printf(" Pass: %d\n", total_passed);
+ printf(" Fail: %d\n", total_failed);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_copy_exception, str(COPY_LOOP));
+}
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_64.S
new file mode 120000
index 000000000..cce33fb6f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/memcpy_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/memcpy_64.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S b/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S
new file mode 120000
index 000000000..0d6fbfaf3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/memcpy_power7.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/stubs.S b/tools/testing/selftests/powerpc/copyloops/stubs.S
new file mode 100644
index 000000000..ec8bcf2bf
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/stubs.S
@@ -0,0 +1,19 @@
+#include <asm/ppc_asm.h>
+
+FUNC_START(enter_vmx_usercopy)
+ li r3,1
+ blr
+
+FUNC_START(exit_vmx_usercopy)
+ li r3,0
+ blr
+
+FUNC_START(enter_vmx_ops)
+ li r3,1
+ blr
+
+FUNC_START(exit_vmx_ops)
+ blr
+
+FUNC_START(__copy_tofrom_user_base)
+ blr
diff --git a/tools/testing/selftests/powerpc/copyloops/validate.c b/tools/testing/selftests/powerpc/copyloops/validate.c
new file mode 100644
index 000000000..0f6873618
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/validate.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <malloc.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "utils.h"
+
+#define MAX_LEN 8192
+#define MAX_OFFSET 16
+#define MIN_REDZONE 128
+#define BUFLEN (MAX_LEN+MAX_OFFSET+2*MIN_REDZONE)
+#define POISON 0xa5
+
+unsigned long COPY_LOOP(void *to, const void *from, unsigned long size);
+
+static void do_one(char *src, char *dst, unsigned long src_off,
+ unsigned long dst_off, unsigned long len, void *redzone,
+ void *fill)
+{
+ char *srcp, *dstp;
+ unsigned long ret;
+ unsigned long i;
+
+ srcp = src + MIN_REDZONE + src_off;
+ dstp = dst + MIN_REDZONE + dst_off;
+
+ memset(src, POISON, BUFLEN);
+ memset(dst, POISON, BUFLEN);
+ memcpy(srcp, fill, len);
+
+ ret = COPY_LOOP(dstp, srcp, len);
+ if (ret && ret != (unsigned long)dstp) {
+ printf("(%p,%p,%ld) returned %ld\n", dstp, srcp, len, ret);
+ abort();
+ }
+
+ if (memcmp(dstp, srcp, len)) {
+ printf("(%p,%p,%ld) miscompare\n", dstp, srcp, len);
+ printf("src: ");
+ for (i = 0; i < len; i++)
+ printf("%02x ", srcp[i]);
+ printf("\ndst: ");
+ for (i = 0; i < len; i++)
+ printf("%02x ", dstp[i]);
+ printf("\n");
+ abort();
+ }
+
+ if (memcmp(dst, redzone, dstp - dst)) {
+ printf("(%p,%p,%ld) redzone before corrupted\n",
+ dstp, srcp, len);
+ abort();
+ }
+
+ if (memcmp(dstp+len, redzone, dst+BUFLEN-(dstp+len))) {
+ printf("(%p,%p,%ld) redzone after corrupted\n",
+ dstp, srcp, len);
+ abort();
+ }
+}
+
+int test_copy_loop(void)
+{
+ char *src, *dst, *redzone, *fill;
+ unsigned long len, src_off, dst_off;
+ unsigned long i;
+
+ src = memalign(BUFLEN, BUFLEN);
+ dst = memalign(BUFLEN, BUFLEN);
+ redzone = malloc(BUFLEN);
+ fill = malloc(BUFLEN);
+
+ if (!src || !dst || !redzone || !fill) {
+ fprintf(stderr, "malloc failed\n");
+ exit(1);
+ }
+
+ memset(redzone, POISON, BUFLEN);
+
+ /* Fill with sequential bytes */
+ for (i = 0; i < BUFLEN; i++)
+ fill[i] = i & 0xff;
+
+ for (len = 1; len < MAX_LEN; len++) {
+ for (src_off = 0; src_off < MAX_OFFSET; src_off++) {
+ for (dst_off = 0; dst_off < MAX_OFFSET; dst_off++) {
+ do_one(src, dst, src_off, dst_off, len,
+ redzone, fill);
+ }
+ }
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_copy_loop, str(COPY_LOOP));
+}
diff --git a/tools/testing/selftests/powerpc/dscr/.gitignore b/tools/testing/selftests/powerpc/dscr/.gitignore
new file mode 100644
index 000000000..b585c6c15
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/.gitignore
@@ -0,0 +1,7 @@
+dscr_default_test
+dscr_explicit_test
+dscr_inherit_exec_test
+dscr_inherit_test
+dscr_sysfs_test
+dscr_sysfs_thread_test
+dscr_user_test
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
new file mode 100644
index 000000000..5df476364
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \
+ dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test \
+ dscr_sysfs_thread_test
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(OUTPUT)/dscr_default_test: LDLIBS += -lpthread
+
+$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/dscr/dscr.h b/tools/testing/selftests/powerpc/dscr/dscr.h
new file mode 100644
index 000000000..cdb840bc5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr.h
@@ -0,0 +1,125 @@
+/*
+ * POWER Data Stream Control Register (DSCR)
+ *
+ * This header file contains helper functions and macros
+ * required for all the DSCR related test cases.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#ifndef _SELFTESTS_POWERPC_DSCR_DSCR_H
+#define _SELFTESTS_POWERPC_DSCR_DSCR_H
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <pthread.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+
+#include "utils.h"
+
+#define THREADS 100 /* Max threads */
+#define COUNT 100 /* Max iterations */
+#define DSCR_MAX 16 /* Max DSCR value */
+#define LEN_MAX 100 /* Max name length */
+
+#define DSCR_DEFAULT "/sys/devices/system/cpu/dscr_default"
+#define CPU_PATH "/sys/devices/system/cpu/"
+
+#define rmb() asm volatile("lwsync":::"memory")
+#define wmb() asm volatile("lwsync":::"memory")
+
+#define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+/* Prilvilege state DSCR access */
+inline unsigned long get_dscr(void)
+{
+ unsigned long ret;
+
+ asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_DSCR_PRIV));
+
+ return ret;
+}
+
+inline void set_dscr(unsigned long val)
+{
+ asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR_PRIV));
+}
+
+/* Problem state DSCR access */
+inline unsigned long get_dscr_usr(void)
+{
+ unsigned long ret;
+
+ asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_DSCR));
+
+ return ret;
+}
+
+inline void set_dscr_usr(unsigned long val)
+{
+ asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
+
+/* Default DSCR access */
+unsigned long get_default_dscr(void)
+{
+ int fd = -1, ret;
+ char buf[16];
+ unsigned long val;
+
+ if (fd == -1) {
+ fd = open(DSCR_DEFAULT, O_RDONLY);
+ if (fd == -1) {
+ perror("open() failed");
+ exit(1);
+ }
+ }
+ memset(buf, 0, sizeof(buf));
+ lseek(fd, 0, SEEK_SET);
+ ret = read(fd, buf, sizeof(buf));
+ if (ret == -1) {
+ perror("read() failed");
+ exit(1);
+ }
+ sscanf(buf, "%lx", &val);
+ close(fd);
+ return val;
+}
+
+void set_default_dscr(unsigned long val)
+{
+ int fd = -1, ret;
+ char buf[16];
+
+ if (fd == -1) {
+ fd = open(DSCR_DEFAULT, O_RDWR);
+ if (fd == -1) {
+ perror("open() failed");
+ exit(1);
+ }
+ }
+ sprintf(buf, "%lx\n", val);
+ ret = write(fd, buf, strlen(buf));
+ if (ret == -1) {
+ perror("write() failed");
+ exit(1);
+ }
+ close(fd);
+}
+
+double uniform_deviate(int seed)
+{
+ return seed * (1.0 / (RAND_MAX + 1.0));
+}
+#endif /* _SELFTESTS_POWERPC_DSCR_DSCR_H */
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
new file mode 100644
index 000000000..9e1a37e93
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
@@ -0,0 +1,127 @@
+/*
+ * POWER Data Stream Control Register (DSCR) default test
+ *
+ * This test modifies the system wide default DSCR through
+ * it's sysfs interface and then verifies that all threads
+ * see the correct changed DSCR value immediately.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static unsigned long dscr; /* System DSCR default */
+static unsigned long sequence;
+static unsigned long result[THREADS];
+
+static void *do_test(void *in)
+{
+ unsigned long thread = (unsigned long)in;
+ unsigned long i;
+
+ for (i = 0; i < COUNT; i++) {
+ unsigned long d, cur_dscr, cur_dscr_usr;
+ unsigned long s1, s2;
+
+ s1 = READ_ONCE(sequence);
+ if (s1 & 1)
+ continue;
+ rmb();
+
+ d = dscr;
+ cur_dscr = get_dscr();
+ cur_dscr_usr = get_dscr_usr();
+
+ rmb();
+ s2 = sequence;
+
+ if (s1 != s2)
+ continue;
+
+ if (cur_dscr != d) {
+ fprintf(stderr, "thread %ld kernel DSCR should be %ld "
+ "but is %ld\n", thread, d, cur_dscr);
+ result[thread] = 1;
+ pthread_exit(&result[thread]);
+ }
+
+ if (cur_dscr_usr != d) {
+ fprintf(stderr, "thread %ld user DSCR should be %ld "
+ "but is %ld\n", thread, d, cur_dscr_usr);
+ result[thread] = 1;
+ pthread_exit(&result[thread]);
+ }
+ }
+ result[thread] = 0;
+ pthread_exit(&result[thread]);
+}
+
+int dscr_default(void)
+{
+ pthread_t threads[THREADS];
+ unsigned long i, *status[THREADS];
+ unsigned long orig_dscr_default;
+
+ orig_dscr_default = get_default_dscr();
+
+ /* Initial DSCR default */
+ dscr = 1;
+ set_default_dscr(dscr);
+
+ /* Spawn all testing threads */
+ for (i = 0; i < THREADS; i++) {
+ if (pthread_create(&threads[i], NULL, do_test, (void *)i)) {
+ perror("pthread_create() failed");
+ goto fail;
+ }
+ }
+
+ srand(getpid());
+
+ /* Keep changing the DSCR default */
+ for (i = 0; i < COUNT; i++) {
+ double ret = uniform_deviate(rand());
+
+ if (ret < 0.0001) {
+ sequence++;
+ wmb();
+
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ set_default_dscr(dscr);
+
+ wmb();
+ sequence++;
+ }
+ }
+
+ /* Individual testing thread exit status */
+ for (i = 0; i < THREADS; i++) {
+ if (pthread_join(threads[i], (void **)&(status[i]))) {
+ perror("pthread_join() failed");
+ goto fail;
+ }
+
+ if (*status[i]) {
+ printf("%ldth thread failed to join with %ld status\n",
+ i, *status[i]);
+ goto fail;
+ }
+ }
+ set_default_dscr(orig_dscr_default);
+ return 0;
+fail:
+ set_default_dscr(orig_dscr_default);
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_default, "dscr_default_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
new file mode 100644
index 000000000..ad9c3ec26
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
@@ -0,0 +1,71 @@
+/*
+ * POWER Data Stream Control Register (DSCR) explicit test
+ *
+ * This test modifies the DSCR value using mtspr instruction and
+ * verifies the change with mfspr instruction. It uses both the
+ * privilege state SPR and the problem state SPR for this purpose.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+int dscr_explicit(void)
+{
+ unsigned long i, dscr = 0;
+
+ srand(getpid());
+ set_dscr(dscr);
+
+ for (i = 0; i < COUNT; i++) {
+ unsigned long cur_dscr, cur_dscr_usr;
+ double ret = uniform_deviate(rand());
+
+ if (ret < 0.001) {
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ set_dscr(dscr);
+ }
+
+ cur_dscr = get_dscr();
+ if (cur_dscr != dscr) {
+ fprintf(stderr, "Kernel DSCR should be %ld but "
+ "is %ld\n", dscr, cur_dscr);
+ return 1;
+ }
+
+ ret = uniform_deviate(rand());
+ if (ret < 0.001) {
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ set_dscr_usr(dscr);
+ }
+
+ cur_dscr_usr = get_dscr_usr();
+ if (cur_dscr_usr != dscr) {
+ fprintf(stderr, "User DSCR should be %ld but "
+ "is %ld\n", dscr, cur_dscr_usr);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_explicit, "dscr_explicit_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
new file mode 100644
index 000000000..c8c240acc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
@@ -0,0 +1,109 @@
+/*
+ * POWER Data Stream Control Register (DSCR) fork exec test
+ *
+ * This testcase modifies the DSCR using mtspr, forks & execs and
+ * verifies that the child is using the changed DSCR using mfspr.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are privileged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be executed
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static char *prog;
+
+static void do_exec(unsigned long parent_dscr)
+{
+ unsigned long cur_dscr, cur_dscr_usr;
+
+ cur_dscr = get_dscr();
+ cur_dscr_usr = get_dscr_usr();
+
+ if (cur_dscr != parent_dscr) {
+ fprintf(stderr, "Parent DSCR %ld was not inherited "
+ "over exec (kernel value)\n", parent_dscr);
+ exit(1);
+ }
+
+ if (cur_dscr_usr != parent_dscr) {
+ fprintf(stderr, "Parent DSCR %ld was not inherited "
+ "over exec (user value)\n", parent_dscr);
+ exit(1);
+ }
+ exit(0);
+}
+
+int dscr_inherit_exec(void)
+{
+ unsigned long i, dscr = 0;
+ pid_t pid;
+
+ for (i = 0; i < COUNT; i++) {
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ if (dscr == get_default_dscr())
+ continue;
+
+ if (i % 2 == 0)
+ set_dscr_usr(dscr);
+ else
+ set_dscr(dscr);
+
+ pid = fork();
+ if (pid == -1) {
+ perror("fork() failed");
+ exit(1);
+ } else if (pid) {
+ int status;
+
+ if (waitpid(pid, &status, 0) == -1) {
+ perror("waitpid() failed");
+ exit(1);
+ }
+
+ if (!WIFEXITED(status)) {
+ fprintf(stderr, "Child didn't exit cleanly\n");
+ exit(1);
+ }
+
+ if (WEXITSTATUS(status) != 0) {
+ fprintf(stderr, "Child didn't exit cleanly\n");
+ return 1;
+ }
+ } else {
+ char dscr_str[16];
+
+ sprintf(dscr_str, "%ld", dscr);
+ execlp(prog, prog, "exec", dscr_str, NULL);
+ exit(1);
+ }
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ if (argc == 3 && !strcmp(argv[1], "exec")) {
+ unsigned long parent_dscr;
+
+ parent_dscr = atoi(argv[2]);
+ do_exec(parent_dscr);
+ } else if (argc != 1) {
+ fprintf(stderr, "Usage: %s\n", argv[0]);
+ exit(1);
+ }
+
+ prog = argv[0];
+ return test_harness(dscr_inherit_exec, "dscr_inherit_exec_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
new file mode 100644
index 000000000..3e5a6d195
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
@@ -0,0 +1,87 @@
+/*
+ * POWER Data Stream Control Register (DSCR) fork test
+ *
+ * This testcase modifies the DSCR using mtspr, forks and then
+ * verifies that the child process has the correct changed DSCR
+ * value using mfspr.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+int dscr_inherit(void)
+{
+ unsigned long i, dscr = 0;
+ pid_t pid;
+
+ srand(getpid());
+ set_dscr(dscr);
+
+ for (i = 0; i < COUNT; i++) {
+ unsigned long cur_dscr, cur_dscr_usr;
+
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ if (i % 2 == 0)
+ set_dscr_usr(dscr);
+ else
+ set_dscr(dscr);
+
+ pid = fork();
+ if (pid == -1) {
+ perror("fork() failed");
+ exit(1);
+ } else if (pid) {
+ int status;
+
+ if (waitpid(pid, &status, 0) == -1) {
+ perror("waitpid() failed");
+ exit(1);
+ }
+
+ if (!WIFEXITED(status)) {
+ fprintf(stderr, "Child didn't exit cleanly\n");
+ exit(1);
+ }
+
+ if (WEXITSTATUS(status) != 0) {
+ fprintf(stderr, "Child didn't exit cleanly\n");
+ return 1;
+ }
+ } else {
+ cur_dscr = get_dscr();
+ if (cur_dscr != dscr) {
+ fprintf(stderr, "Kernel DSCR should be %ld "
+ "but is %ld\n", dscr, cur_dscr);
+ exit(1);
+ }
+
+ cur_dscr_usr = get_dscr_usr();
+ if (cur_dscr_usr != dscr) {
+ fprintf(stderr, "User DSCR should be %ld "
+ "but is %ld\n", dscr, cur_dscr_usr);
+ exit(1);
+ }
+ exit(0);
+ }
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_inherit, "dscr_inherit_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
new file mode 100644
index 000000000..1899bd851
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
@@ -0,0 +1,101 @@
+/*
+ * POWER Data Stream Control Register (DSCR) sysfs interface test
+ *
+ * This test updates to system wide DSCR default through the sysfs interface
+ * and then verifies that all the CPU specific DSCR defaults are updated as
+ * well verified from their sysfs interfaces.
+ *
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static int check_cpu_dscr_default(char *file, unsigned long val)
+{
+ char buf[10];
+ int fd, rc;
+
+ fd = open(file, O_RDWR);
+ if (fd == -1) {
+ perror("open() failed");
+ return 1;
+ }
+
+ rc = read(fd, buf, sizeof(buf));
+ if (rc == -1) {
+ perror("read() failed");
+ return 1;
+ }
+ close(fd);
+
+ buf[rc] = '\0';
+ if (strtol(buf, NULL, 16) != val) {
+ printf("DSCR match failed: %ld (system) %ld (cpu)\n",
+ val, strtol(buf, NULL, 16));
+ return 1;
+ }
+ return 0;
+}
+
+static int check_all_cpu_dscr_defaults(unsigned long val)
+{
+ DIR *sysfs;
+ struct dirent *dp;
+ char file[LEN_MAX];
+
+ sysfs = opendir(CPU_PATH);
+ if (!sysfs) {
+ perror("opendir() failed");
+ return 1;
+ }
+
+ while ((dp = readdir(sysfs))) {
+ int len;
+
+ if (!(dp->d_type & DT_DIR))
+ continue;
+ if (!strcmp(dp->d_name, "cpuidle"))
+ continue;
+ if (!strstr(dp->d_name, "cpu"))
+ continue;
+
+ len = snprintf(file, LEN_MAX, "%s%s/dscr", CPU_PATH, dp->d_name);
+ if (len >= LEN_MAX)
+ continue;
+ if (access(file, F_OK))
+ continue;
+
+ if (check_cpu_dscr_default(file, val))
+ return 1;
+ }
+ closedir(sysfs);
+ return 0;
+}
+
+int dscr_sysfs(void)
+{
+ unsigned long orig_dscr_default;
+ int i, j;
+
+ orig_dscr_default = get_default_dscr();
+ for (i = 0; i < COUNT; i++) {
+ for (j = 0; j < DSCR_MAX; j++) {
+ set_default_dscr(j);
+ if (check_all_cpu_dscr_defaults(j))
+ goto fail;
+ }
+ }
+ set_default_dscr(orig_dscr_default);
+ return 0;
+fail:
+ set_default_dscr(orig_dscr_default);
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_sysfs, "dscr_sysfs_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
new file mode 100644
index 000000000..ad97b592e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
@@ -0,0 +1,80 @@
+/*
+ * POWER Data Stream Control Register (DSCR) sysfs thread test
+ *
+ * This test updates the system wide DSCR default value through
+ * sysfs interface which should then update all the CPU specific
+ * DSCR default values which must also be then visible to threads
+ * executing on individual CPUs on the system.
+ *
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#define _GNU_SOURCE
+#include "dscr.h"
+
+static int test_thread_dscr(unsigned long val)
+{
+ unsigned long cur_dscr, cur_dscr_usr;
+
+ cur_dscr = get_dscr();
+ cur_dscr_usr = get_dscr_usr();
+
+ if (val != cur_dscr) {
+ printf("[cpu %d] Kernel DSCR should be %ld but is %ld\n",
+ sched_getcpu(), val, cur_dscr);
+ return 1;
+ }
+
+ if (val != cur_dscr_usr) {
+ printf("[cpu %d] User DSCR should be %ld but is %ld\n",
+ sched_getcpu(), val, cur_dscr_usr);
+ return 1;
+ }
+ return 0;
+}
+
+static int check_cpu_dscr_thread(unsigned long val)
+{
+ cpu_set_t mask;
+ int cpu;
+
+ for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+ if (sched_setaffinity(0, sizeof(mask), &mask))
+ continue;
+
+ if (test_thread_dscr(val))
+ return 1;
+ }
+ return 0;
+
+}
+
+int dscr_sysfs_thread(void)
+{
+ unsigned long orig_dscr_default;
+ int i, j;
+
+ orig_dscr_default = get_default_dscr();
+ for (i = 0; i < COUNT; i++) {
+ for (j = 0; j < DSCR_MAX; j++) {
+ set_default_dscr(j);
+ if (check_cpu_dscr_thread(j))
+ goto fail;
+ }
+ }
+ set_default_dscr(orig_dscr_default);
+ return 0;
+fail:
+ set_default_dscr(orig_dscr_default);
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_sysfs_thread, "dscr_sysfs_thread_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
new file mode 100644
index 000000000..77d16b5e7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
@@ -0,0 +1,61 @@
+/*
+ * POWER Data Stream Control Register (DSCR) SPR test
+ *
+ * This test modifies the DSCR value through both the SPR number
+ * based mtspr instruction and then makes sure that the same is
+ * reflected through mfspr instruction using either of the SPR
+ * numbers.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2013, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static int check_dscr(char *str)
+{
+ unsigned long cur_dscr, cur_dscr_usr;
+
+ cur_dscr = get_dscr();
+ cur_dscr_usr = get_dscr_usr();
+ if (cur_dscr != cur_dscr_usr) {
+ printf("%s set, kernel get %lx != user get %lx\n",
+ str, cur_dscr, cur_dscr_usr);
+ return 1;
+ }
+ return 0;
+}
+
+int dscr_user(void)
+{
+ int i;
+
+ check_dscr("");
+
+ for (i = 0; i < COUNT; i++) {
+ set_dscr(i);
+ if (check_dscr("kernel"))
+ return 1;
+ }
+
+ for (i = 0; i < COUNT; i++) {
+ set_dscr_usr(i);
+ if (check_dscr("user"))
+ return 1;
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_user, "dscr_user_test");
+}
diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
new file mode 100644
index 000000000..9d7166dfa
--- /dev/null
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <elf.h>
+#include <fcntl.h>
+#include <link.h>
+#include <sys/stat.h>
+
+#include "subunit.h"
+#include "utils.h"
+
+#define KILL_TIMEOUT 5
+
+static uint64_t timeout = 120;
+
+int run_test(int (test_function)(void), char *name)
+{
+ bool terminated;
+ int rc, status;
+ pid_t pid;
+
+ /* Make sure output is flushed before forking */
+ fflush(stdout);
+
+ pid = fork();
+ if (pid == 0) {
+ setpgid(0, 0);
+ exit(test_function());
+ } else if (pid == -1) {
+ perror("fork");
+ return 1;
+ }
+
+ setpgid(pid, pid);
+
+ /* Wake us up in timeout seconds */
+ alarm(timeout);
+ terminated = false;
+
+wait:
+ rc = waitpid(pid, &status, 0);
+ if (rc == -1) {
+ if (errno != EINTR) {
+ printf("unknown error from waitpid\n");
+ return 1;
+ }
+
+ if (terminated) {
+ printf("!! force killing %s\n", name);
+ kill(-pid, SIGKILL);
+ return 1;
+ } else {
+ printf("!! killing %s\n", name);
+ kill(-pid, SIGTERM);
+ terminated = true;
+ alarm(KILL_TIMEOUT);
+ goto wait;
+ }
+ }
+
+ /* Kill anything else in the process group that is still running */
+ kill(-pid, SIGTERM);
+
+ if (WIFEXITED(status))
+ status = WEXITSTATUS(status);
+ else {
+ if (WIFSIGNALED(status))
+ printf("!! child died by signal %d\n", WTERMSIG(status));
+ else
+ printf("!! child died by unknown cause\n");
+
+ status = 1; /* Signal or other */
+ }
+
+ return status;
+}
+
+static void sig_handler(int signum)
+{
+ /* Just wake us up from waitpid */
+}
+
+static struct sigaction sig_action = {
+ .sa_handler = sig_handler,
+};
+
+void test_harness_set_timeout(uint64_t time)
+{
+ timeout = time;
+}
+
+int test_harness(int (test_function)(void), char *name)
+{
+ int rc;
+
+ test_start(name);
+ test_set_git_version(GIT_VERSION);
+
+ if (sigaction(SIGINT, &sig_action, NULL)) {
+ perror("sigaction (sigint)");
+ test_error(name);
+ return 1;
+ }
+
+ if (sigaction(SIGALRM, &sig_action, NULL)) {
+ perror("sigaction (sigalrm)");
+ test_error(name);
+ return 1;
+ }
+
+ rc = run_test(test_function, name);
+
+ if (rc == MAGIC_SKIP_RETURN_VALUE) {
+ test_skip(name);
+ /* so that skipped test is not marked as failed */
+ rc = 0;
+ } else
+ test_finish(name, rc);
+
+ return rc;
+}
diff --git a/tools/testing/selftests/powerpc/include/basic_asm.h b/tools/testing/selftests/powerpc/include/basic_asm.h
new file mode 100644
index 000000000..886dc026f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/basic_asm.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SELFTESTS_POWERPC_BASIC_ASM_H
+#define _SELFTESTS_POWERPC_BASIC_ASM_H
+
+#include <ppc-asm.h>
+#include <asm/unistd.h>
+
+#define LOAD_REG_IMMEDIATE(reg, expr) \
+ lis reg, (expr)@highest; \
+ ori reg, reg, (expr)@higher; \
+ rldicr reg, reg, 32, 31; \
+ oris reg, reg, (expr)@high; \
+ ori reg, reg, (expr)@l;
+
+/*
+ * Note: These macros assume that variables being stored on the stack are
+ * doublewords, while this is usually the case it may not always be the
+ * case for each use case.
+ */
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define STACK_FRAME_MIN_SIZE 32
+#define STACK_FRAME_TOC_POS 24
+#define __STACK_FRAME_PARAM(_param) (32 + ((_param)*8))
+#define __STACK_FRAME_LOCAL(_num_params, _var_num) \
+ ((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*8))
+#else
+#define STACK_FRAME_MIN_SIZE 112
+#define STACK_FRAME_TOC_POS 40
+#define __STACK_FRAME_PARAM(i) (48 + ((i)*8))
+
+/*
+ * Caveat: if a function passed more than 8 doublewords, the caller will have
+ * made more space... which would render the 112 incorrect.
+ */
+#define __STACK_FRAME_LOCAL(_num_params, _var_num) \
+ (112 + ((_var_num)*8))
+#endif
+
+/* Parameter x saved to the stack */
+#define STACK_FRAME_PARAM(var) __STACK_FRAME_PARAM(var)
+
+/* Local variable x saved to the stack after x parameters */
+#define STACK_FRAME_LOCAL(num_params, var) \
+ __STACK_FRAME_LOCAL(num_params, var)
+#define STACK_FRAME_LR_POS 16
+#define STACK_FRAME_CR_POS 8
+
+/*
+ * It is very important to note here that _extra is the extra amount of
+ * stack space needed. This space can be accessed using STACK_FRAME_PARAM()
+ * or STACK_FRAME_LOCAL() macros.
+ *
+ * r1 and r2 are not defined in ppc-asm.h (instead they are defined as sp
+ * and toc). Kernel programmers tend to prefer rX even for r1 and r2, hence
+ * %1 and %r2. r0 is defined in ppc-asm.h and therefore %r0 gets
+ * preprocessed incorrectly, hence r0.
+ */
+#define PUSH_BASIC_STACK(_extra) \
+ mflr r0; \
+ std r0, STACK_FRAME_LR_POS(%r1); \
+ stdu %r1, -(_extra + STACK_FRAME_MIN_SIZE)(%r1); \
+ mfcr r0; \
+ stw r0, STACK_FRAME_CR_POS(%r1); \
+ std %r2, STACK_FRAME_TOC_POS(%r1);
+
+#define POP_BASIC_STACK(_extra) \
+ ld %r2, STACK_FRAME_TOC_POS(%r1); \
+ lwz r0, STACK_FRAME_CR_POS(%r1); \
+ mtcr r0; \
+ addi %r1, %r1, (_extra + STACK_FRAME_MIN_SIZE); \
+ ld r0, STACK_FRAME_LR_POS(%r1); \
+ mtlr r0;
+
+#endif /* _SELFTESTS_POWERPC_BASIC_ASM_H */
diff --git a/tools/testing/selftests/powerpc/include/fpu_asm.h b/tools/testing/selftests/powerpc/include/fpu_asm.h
new file mode 100644
index 000000000..6a387d255
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/fpu_asm.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _SELFTESTS_POWERPC_FPU_ASM_H
+#define _SELFTESTS_POWERPC_FPU_ASM_H
+#include "basic_asm.h"
+
+#define PUSH_FPU(stack_size) \
+ stfd f31,(stack_size + STACK_FRAME_MIN_SIZE)(%r1); \
+ stfd f30,(stack_size + STACK_FRAME_MIN_SIZE - 8)(%r1); \
+ stfd f29,(stack_size + STACK_FRAME_MIN_SIZE - 16)(%r1); \
+ stfd f28,(stack_size + STACK_FRAME_MIN_SIZE - 24)(%r1); \
+ stfd f27,(stack_size + STACK_FRAME_MIN_SIZE - 32)(%r1); \
+ stfd f26,(stack_size + STACK_FRAME_MIN_SIZE - 40)(%r1); \
+ stfd f25,(stack_size + STACK_FRAME_MIN_SIZE - 48)(%r1); \
+ stfd f24,(stack_size + STACK_FRAME_MIN_SIZE - 56)(%r1); \
+ stfd f23,(stack_size + STACK_FRAME_MIN_SIZE - 64)(%r1); \
+ stfd f22,(stack_size + STACK_FRAME_MIN_SIZE - 72)(%r1); \
+ stfd f21,(stack_size + STACK_FRAME_MIN_SIZE - 80)(%r1); \
+ stfd f20,(stack_size + STACK_FRAME_MIN_SIZE - 88)(%r1); \
+ stfd f19,(stack_size + STACK_FRAME_MIN_SIZE - 96)(%r1); \
+ stfd f18,(stack_size + STACK_FRAME_MIN_SIZE - 104)(%r1); \
+ stfd f17,(stack_size + STACK_FRAME_MIN_SIZE - 112)(%r1); \
+ stfd f16,(stack_size + STACK_FRAME_MIN_SIZE - 120)(%r1); \
+ stfd f15,(stack_size + STACK_FRAME_MIN_SIZE - 128)(%r1); \
+ stfd f14,(stack_size + STACK_FRAME_MIN_SIZE - 136)(%r1);
+
+#define POP_FPU(stack_size) \
+ lfd f31,(stack_size + STACK_FRAME_MIN_SIZE)(%r1); \
+ lfd f30,(stack_size + STACK_FRAME_MIN_SIZE - 8)(%r1); \
+ lfd f29,(stack_size + STACK_FRAME_MIN_SIZE - 16)(%r1); \
+ lfd f28,(stack_size + STACK_FRAME_MIN_SIZE - 24)(%r1); \
+ lfd f27,(stack_size + STACK_FRAME_MIN_SIZE - 32)(%r1); \
+ lfd f26,(stack_size + STACK_FRAME_MIN_SIZE - 40)(%r1); \
+ lfd f25,(stack_size + STACK_FRAME_MIN_SIZE - 48)(%r1); \
+ lfd f24,(stack_size + STACK_FRAME_MIN_SIZE - 56)(%r1); \
+ lfd f23,(stack_size + STACK_FRAME_MIN_SIZE - 64)(%r1); \
+ lfd f22,(stack_size + STACK_FRAME_MIN_SIZE - 72)(%r1); \
+ lfd f21,(stack_size + STACK_FRAME_MIN_SIZE - 80)(%r1); \
+ lfd f20,(stack_size + STACK_FRAME_MIN_SIZE - 88)(%r1); \
+ lfd f19,(stack_size + STACK_FRAME_MIN_SIZE - 96)(%r1); \
+ lfd f18,(stack_size + STACK_FRAME_MIN_SIZE - 104)(%r1); \
+ lfd f17,(stack_size + STACK_FRAME_MIN_SIZE - 112)(%r1); \
+ lfd f16,(stack_size + STACK_FRAME_MIN_SIZE - 120)(%r1); \
+ lfd f15,(stack_size + STACK_FRAME_MIN_SIZE - 128)(%r1); \
+ lfd f14,(stack_size + STACK_FRAME_MIN_SIZE - 136)(%r1);
+
+/*
+ * Careful calling this, it will 'clobber' fpu (by design)
+ * Don't call this from C
+ */
+FUNC_START(load_fpu)
+ lfd f14,0(r3)
+ lfd f15,8(r3)
+ lfd f16,16(r3)
+ lfd f17,24(r3)
+ lfd f18,32(r3)
+ lfd f19,40(r3)
+ lfd f20,48(r3)
+ lfd f21,56(r3)
+ lfd f22,64(r3)
+ lfd f23,72(r3)
+ lfd f24,80(r3)
+ lfd f25,88(r3)
+ lfd f26,96(r3)
+ lfd f27,104(r3)
+ lfd f28,112(r3)
+ lfd f29,120(r3)
+ lfd f30,128(r3)
+ lfd f31,136(r3)
+ blr
+FUNC_END(load_fpu)
+
+#endif /* _SELFTESTS_POWERPC_FPU_ASM_H */
diff --git a/tools/testing/selftests/powerpc/include/gpr_asm.h b/tools/testing/selftests/powerpc/include/gpr_asm.h
new file mode 100644
index 000000000..f6f38852d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/gpr_asm.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _SELFTESTS_POWERPC_GPR_ASM_H
+#define _SELFTESTS_POWERPC_GPR_ASM_H
+
+#include "basic_asm.h"
+
+#define __PUSH_NVREGS(top_pos); \
+ std r31,(top_pos)(%r1); \
+ std r30,(top_pos - 8)(%r1); \
+ std r29,(top_pos - 16)(%r1); \
+ std r28,(top_pos - 24)(%r1); \
+ std r27,(top_pos - 32)(%r1); \
+ std r26,(top_pos - 40)(%r1); \
+ std r25,(top_pos - 48)(%r1); \
+ std r24,(top_pos - 56)(%r1); \
+ std r23,(top_pos - 64)(%r1); \
+ std r22,(top_pos - 72)(%r1); \
+ std r21,(top_pos - 80)(%r1); \
+ std r20,(top_pos - 88)(%r1); \
+ std r19,(top_pos - 96)(%r1); \
+ std r18,(top_pos - 104)(%r1); \
+ std r17,(top_pos - 112)(%r1); \
+ std r16,(top_pos - 120)(%r1); \
+ std r15,(top_pos - 128)(%r1); \
+ std r14,(top_pos - 136)(%r1)
+
+#define __POP_NVREGS(top_pos); \
+ ld r31,(top_pos)(%r1); \
+ ld r30,(top_pos - 8)(%r1); \
+ ld r29,(top_pos - 16)(%r1); \
+ ld r28,(top_pos - 24)(%r1); \
+ ld r27,(top_pos - 32)(%r1); \
+ ld r26,(top_pos - 40)(%r1); \
+ ld r25,(top_pos - 48)(%r1); \
+ ld r24,(top_pos - 56)(%r1); \
+ ld r23,(top_pos - 64)(%r1); \
+ ld r22,(top_pos - 72)(%r1); \
+ ld r21,(top_pos - 80)(%r1); \
+ ld r20,(top_pos - 88)(%r1); \
+ ld r19,(top_pos - 96)(%r1); \
+ ld r18,(top_pos - 104)(%r1); \
+ ld r17,(top_pos - 112)(%r1); \
+ ld r16,(top_pos - 120)(%r1); \
+ ld r15,(top_pos - 128)(%r1); \
+ ld r14,(top_pos - 136)(%r1)
+
+#define PUSH_NVREGS(stack_size) \
+ __PUSH_NVREGS(stack_size + STACK_FRAME_MIN_SIZE)
+
+/* 18 NV FPU REGS */
+#define PUSH_NVREGS_BELOW_FPU(stack_size) \
+ __PUSH_NVREGS(stack_size + STACK_FRAME_MIN_SIZE - (18 * 8))
+
+#define POP_NVREGS(stack_size) \
+ __POP_NVREGS(stack_size + STACK_FRAME_MIN_SIZE)
+
+/* 18 NV FPU REGS */
+#define POP_NVREGS_BELOW_FPU(stack_size) \
+ __POP_NVREGS(stack_size + STACK_FRAME_MIN_SIZE - (18 * 8))
+
+/*
+ * Careful calling this, it will 'clobber' NVGPRs (by design)
+ * Don't call this from C
+ */
+FUNC_START(load_gpr)
+ ld r14,0(r3)
+ ld r15,8(r3)
+ ld r16,16(r3)
+ ld r17,24(r3)
+ ld r18,32(r3)
+ ld r19,40(r3)
+ ld r20,48(r3)
+ ld r21,56(r3)
+ ld r22,64(r3)
+ ld r23,72(r3)
+ ld r24,80(r3)
+ ld r25,88(r3)
+ ld r26,96(r3)
+ ld r27,104(r3)
+ ld r28,112(r3)
+ ld r29,120(r3)
+ ld r30,128(r3)
+ ld r31,136(r3)
+ blr
+FUNC_END(load_gpr)
+
+
+#endif /* _SELFTESTS_POWERPC_GPR_ASM_H */
diff --git a/tools/testing/selftests/powerpc/include/instructions.h b/tools/testing/selftests/powerpc/include/instructions.h
new file mode 100644
index 000000000..f36061eb6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/instructions.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SELFTESTS_POWERPC_INSTRUCTIONS_H
+#define _SELFTESTS_POWERPC_INSTRUCTIONS_H
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/* This defines the "copy" instruction from Power ISA 3.0 Book II, section 4.4. */
+#define __COPY(RA, RB, L) \
+ (0x7c00060c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10))
+#define COPY(RA, RB, L) \
+ .long __COPY((RA), (RB), (L))
+
+static inline void copy(void *i)
+{
+ asm volatile(str(COPY(0, %0, 0))";"
+ :
+ : "b" (i)
+ : "memory"
+ );
+}
+
+static inline void copy_first(void *i)
+{
+ asm volatile(str(COPY(0, %0, 1))";"
+ :
+ : "b" (i)
+ : "memory"
+ );
+}
+
+/* This defines the "paste" instruction from Power ISA 3.0 Book II, section 4.4. */
+#define __PASTE(RA, RB, L, RC) \
+ (0x7c00070c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10) | (RC) << (31-31))
+#define PASTE(RA, RB, L, RC) \
+ .long __PASTE((RA), (RB), (L), (RC))
+
+static inline int paste(void *i)
+{
+ int cr;
+
+ asm volatile(str(PASTE(0, %1, 0, 0))";"
+ "mfcr %0;"
+ : "=r" (cr)
+ : "b" (i)
+ : "memory"
+ );
+ return cr;
+}
+
+static inline int paste_last(void *i)
+{
+ int cr;
+
+ asm volatile(str(PASTE(0, %1, 1, 1))";"
+ "mfcr %0;"
+ : "=r" (cr)
+ : "b" (i)
+ : "memory"
+ );
+ return cr;
+}
+
+#define PPC_INST_COPY __COPY(0, 0, 0)
+#define PPC_INST_COPY_FIRST __COPY(0, 0, 1)
+#define PPC_INST_PASTE __PASTE(0, 0, 0, 0)
+#define PPC_INST_PASTE_LAST __PASTE(0, 0, 1, 1)
+
+#endif /* _SELFTESTS_POWERPC_INSTRUCTIONS_H */
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
new file mode 100644
index 000000000..7f348c059
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_REG_H
+#define _SELFTESTS_POWERPC_REG_H
+
+#define __stringify_1(x) #x
+#define __stringify(x) __stringify_1(x)
+
+#define mfspr(rn) ({unsigned long rval; \
+ asm volatile("mfspr %0," _str(rn) \
+ : "=r" (rval)); rval; })
+#define mtspr(rn, v) asm volatile("mtspr " _str(rn) ",%0" : \
+ : "r" ((unsigned long)(v)) \
+ : "memory")
+
+#define mb() asm volatile("sync" : : : "memory");
+
+#define SPRN_MMCR2 769
+#define SPRN_MMCRA 770
+#define SPRN_MMCR0 779
+#define MMCR0_PMAO 0x00000080
+#define MMCR0_PMAE 0x04000000
+#define MMCR0_FC 0x80000000
+#define SPRN_EBBHR 804
+#define SPRN_EBBRR 805
+#define SPRN_BESCR 806 /* Branch event status & control register */
+#define SPRN_BESCRS 800 /* Branch event status & control set (1 bits set to 1) */
+#define SPRN_BESCRSU 801 /* Branch event status & control set upper */
+#define SPRN_BESCRR 802 /* Branch event status & control REset (1 bits set to 0) */
+#define SPRN_BESCRRU 803 /* Branch event status & control REset upper */
+
+#define BESCR_PMEO 0x1 /* PMU Event-based exception Occurred */
+#define BESCR_PME (0x1ul << 32) /* PMU Event-based exception Enable */
+
+#define SPRN_PMC1 771
+#define SPRN_PMC2 772
+#define SPRN_PMC3 773
+#define SPRN_PMC4 774
+#define SPRN_PMC5 775
+#define SPRN_PMC6 776
+
+#define SPRN_SIAR 780
+#define SPRN_SDAR 781
+#define SPRN_SIER 768
+
+#define SPRN_TEXASR 0x82 /* Transaction Exception and Status Register */
+#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */
+#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
+#define SPRN_TAR 0x32f /* Target Address Register */
+
+#define SPRN_DSCR_PRIV 0x11 /* Privilege State DSCR */
+#define SPRN_DSCR 0x03 /* Data Stream Control Register */
+#define SPRN_PPR 896 /* Program Priority Register */
+#define SPRN_AMR 13 /* Authority Mask Register - problem state */
+
+/* TEXASR register bits */
+#define TEXASR_FC 0xFE00000000000000
+#define TEXASR_FP 0x0100000000000000
+#define TEXASR_DA 0x0080000000000000
+#define TEXASR_NO 0x0040000000000000
+#define TEXASR_FO 0x0020000000000000
+#define TEXASR_SIC 0x0010000000000000
+#define TEXASR_NTC 0x0008000000000000
+#define TEXASR_TC 0x0004000000000000
+#define TEXASR_TIC 0x0002000000000000
+#define TEXASR_IC 0x0001000000000000
+#define TEXASR_IFC 0x0000800000000000
+#define TEXASR_ABT 0x0000000100000000
+#define TEXASR_SPD 0x0000000080000000
+#define TEXASR_HV 0x0000000020000000
+#define TEXASR_PR 0x0000000010000000
+#define TEXASR_FS 0x0000000008000000
+#define TEXASR_TE 0x0000000004000000
+#define TEXASR_ROT 0x0000000002000000
+
+/* Vector Instructions */
+#define VSX_XX1(xs, ra, rb) (((xs) & 0x1f) << 21 | ((ra) << 16) | \
+ ((rb) << 11) | (((xs) >> 5)))
+#define STXVD2X(xs, ra, rb) .long (0x7c000798 | VSX_XX1((xs), (ra), (rb)))
+#define LXVD2X(xs, ra, rb) .long (0x7c000698 | VSX_XX1((xs), (ra), (rb)))
+
+#define ASM_LOAD_GPR_IMMED(_asm_symbol_name_immed) \
+ "li 14, %[" #_asm_symbol_name_immed "];" \
+ "li 15, %[" #_asm_symbol_name_immed "];" \
+ "li 16, %[" #_asm_symbol_name_immed "];" \
+ "li 17, %[" #_asm_symbol_name_immed "];" \
+ "li 18, %[" #_asm_symbol_name_immed "];" \
+ "li 19, %[" #_asm_symbol_name_immed "];" \
+ "li 20, %[" #_asm_symbol_name_immed "];" \
+ "li 21, %[" #_asm_symbol_name_immed "];" \
+ "li 22, %[" #_asm_symbol_name_immed "];" \
+ "li 23, %[" #_asm_symbol_name_immed "];" \
+ "li 24, %[" #_asm_symbol_name_immed "];" \
+ "li 25, %[" #_asm_symbol_name_immed "];" \
+ "li 26, %[" #_asm_symbol_name_immed "];" \
+ "li 27, %[" #_asm_symbol_name_immed "];" \
+ "li 28, %[" #_asm_symbol_name_immed "];" \
+ "li 29, %[" #_asm_symbol_name_immed "];" \
+ "li 30, %[" #_asm_symbol_name_immed "];" \
+ "li 31, %[" #_asm_symbol_name_immed "];"
+
+#define ASM_LOAD_FPR_SINGLE_PRECISION(_asm_symbol_name_addr) \
+ "lfs 0, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 1, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 2, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 3, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 4, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 5, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 6, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 7, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 8, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 9, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 10, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 11, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 12, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 13, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 14, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 15, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 16, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 17, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 18, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 19, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 20, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 21, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 22, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 23, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 24, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 25, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 26, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 27, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 28, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 29, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 30, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 31, 0(%[" #_asm_symbol_name_addr "]);"
+
+#ifndef __ASSEMBLER__
+void store_gpr(unsigned long *addr);
+void load_gpr(unsigned long *addr);
+void load_fpr_single_precision(float *addr);
+void store_fpr_single_precision(float *addr);
+#endif /* end of __ASSEMBLER__ */
+
+#endif /* _SELFTESTS_POWERPC_REG_H */
diff --git a/tools/testing/selftests/powerpc/include/subunit.h b/tools/testing/selftests/powerpc/include/subunit.h
new file mode 100644
index 000000000..9c6c4e901
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/subunit.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_SUBUNIT_H
+#define _SELFTESTS_POWERPC_SUBUNIT_H
+
+static inline void test_start(char *name)
+{
+ printf("test: %s\n", name);
+}
+
+static inline void test_failure_detail(char *name, char *detail)
+{
+ printf("failure: %s [%s]\n", name, detail);
+}
+
+static inline void test_failure(char *name)
+{
+ printf("failure: %s\n", name);
+}
+
+static inline void test_error(char *name)
+{
+ printf("error: %s\n", name);
+}
+
+static inline void test_skip(char *name)
+{
+ printf("skip: %s\n", name);
+}
+
+static inline void test_success(char *name)
+{
+ printf("success: %s\n", name);
+}
+
+static inline void test_finish(char *name, int status)
+{
+ if (status)
+ test_failure(name);
+ else
+ test_success(name);
+}
+
+static inline void test_set_git_version(char *value)
+{
+ printf("tags: git_version:%s\n", value);
+}
+
+#endif /* _SELFTESTS_POWERPC_SUBUNIT_H */
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
new file mode 100644
index 000000000..c58c37082
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_UTILS_H
+#define _SELFTESTS_POWERPC_UTILS_H
+
+#define __cacheline_aligned __attribute__((aligned(128)))
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <linux/auxvec.h>
+#include "reg.h"
+
+/* Avoid headaches with PRI?64 - just use %ll? always */
+typedef unsigned long long u64;
+typedef signed long long s64;
+
+/* Just for familiarity */
+typedef uint32_t u32;
+typedef uint16_t u16;
+typedef uint8_t u8;
+
+void test_harness_set_timeout(uint64_t time);
+int test_harness(int (test_function)(void), char *name);
+
+int read_auxv(char *buf, ssize_t buf_size);
+void *find_auxv_entry(int type, char *auxv);
+void *get_auxv_entry(int type);
+
+int pick_online_cpu(void);
+
+static inline bool have_hwcap(unsigned long ftr)
+{
+ return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr;
+}
+
+#ifdef AT_HWCAP2
+static inline bool have_hwcap2(unsigned long ftr2)
+{
+ return ((unsigned long)get_auxv_entry(AT_HWCAP2) & ftr2) == ftr2;
+}
+#else
+static inline bool have_hwcap2(unsigned long ftr2)
+{
+ return false;
+}
+#endif
+
+bool is_ppc64le(void);
+
+/* Yes, this is evil */
+#define FAIL_IF(x) \
+do { \
+ if ((x)) { \
+ fprintf(stderr, \
+ "[FAIL] Test FAILED on line %d\n", __LINE__); \
+ return 1; \
+ } \
+} while (0)
+
+/* The test harness uses this, yes it's gross */
+#define MAGIC_SKIP_RETURN_VALUE 99
+
+#define SKIP_IF(x) \
+do { \
+ if ((x)) { \
+ fprintf(stderr, \
+ "[SKIP] Test skipped on line %d\n", __LINE__); \
+ return MAGIC_SKIP_RETURN_VALUE; \
+ } \
+} while (0)
+
+#define _str(s) #s
+#define str(s) _str(s)
+
+/* POWER9 feature */
+#ifndef PPC_FEATURE2_ARCH_3_00
+#define PPC_FEATURE2_ARCH_3_00 0x00800000
+#endif
+
+#endif /* _SELFTESTS_POWERPC_UTILS_H */
diff --git a/tools/testing/selftests/powerpc/include/vmx_asm.h b/tools/testing/selftests/powerpc/include/vmx_asm.h
new file mode 100644
index 000000000..2eaaeca9c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/vmx_asm.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+
+/* POS MUST BE 16 ALIGNED! */
+#define PUSH_VMX(pos,reg) \
+ li reg,pos; \
+ stvx v20,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v21,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v22,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v23,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v24,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v25,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v26,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v27,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v28,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v29,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v30,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v31,reg,%r1;
+
+/* POS MUST BE 16 ALIGNED! */
+#define POP_VMX(pos,reg) \
+ li reg,pos; \
+ lvx v20,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v21,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v22,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v23,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v24,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v25,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v26,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v27,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v28,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v29,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v30,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v31,reg,%r1;
+
+/*
+ * Careful this will 'clobber' vmx (by design)
+ * Don't call this from C
+ */
+FUNC_START(load_vmx)
+ li r5,0
+ lvx v20,r5,r3
+ addi r5,r5,16
+ lvx v21,r5,r3
+ addi r5,r5,16
+ lvx v22,r5,r3
+ addi r5,r5,16
+ lvx v23,r5,r3
+ addi r5,r5,16
+ lvx v24,r5,r3
+ addi r5,r5,16
+ lvx v25,r5,r3
+ addi r5,r5,16
+ lvx v26,r5,r3
+ addi r5,r5,16
+ lvx v27,r5,r3
+ addi r5,r5,16
+ lvx v28,r5,r3
+ addi r5,r5,16
+ lvx v29,r5,r3
+ addi r5,r5,16
+ lvx v30,r5,r3
+ addi r5,r5,16
+ lvx v31,r5,r3
+ blr
+FUNC_END(load_vmx)
diff --git a/tools/testing/selftests/powerpc/include/vsx_asm.h b/tools/testing/selftests/powerpc/include/vsx_asm.h
new file mode 100644
index 000000000..54064ced9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/vsx_asm.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+
+/*
+ * Careful this will 'clobber' vsx (by design), VSX are always
+ * volatile though so unlike vmx this isn't so much of an issue
+ * Still should avoid calling from C
+ */
+FUNC_START(load_vsx)
+ li r5,0
+ lxvd2x vs20,r5,r3
+ addi r5,r5,16
+ lxvd2x vs21,r5,r3
+ addi r5,r5,16
+ lxvd2x vs22,r5,r3
+ addi r5,r5,16
+ lxvd2x vs23,r5,r3
+ addi r5,r5,16
+ lxvd2x vs24,r5,r3
+ addi r5,r5,16
+ lxvd2x vs25,r5,r3
+ addi r5,r5,16
+ lxvd2x vs26,r5,r3
+ addi r5,r5,16
+ lxvd2x vs27,r5,r3
+ addi r5,r5,16
+ lxvd2x vs28,r5,r3
+ addi r5,r5,16
+ lxvd2x vs29,r5,r3
+ addi r5,r5,16
+ lxvd2x vs30,r5,r3
+ addi r5,r5,16
+ lxvd2x vs31,r5,r3
+ blr
+FUNC_END(load_vsx)
+
+FUNC_START(store_vsx)
+ li r5,0
+ stxvd2x vs20,r5,r3
+ addi r5,r5,16
+ stxvd2x vs21,r5,r3
+ addi r5,r5,16
+ stxvd2x vs22,r5,r3
+ addi r5,r5,16
+ stxvd2x vs23,r5,r3
+ addi r5,r5,16
+ stxvd2x vs24,r5,r3
+ addi r5,r5,16
+ stxvd2x vs25,r5,r3
+ addi r5,r5,16
+ stxvd2x vs26,r5,r3
+ addi r5,r5,16
+ stxvd2x vs27,r5,r3
+ addi r5,r5,16
+ stxvd2x vs28,r5,r3
+ addi r5,r5,16
+ stxvd2x vs29,r5,r3
+ addi r5,r5,16
+ stxvd2x vs30,r5,r3
+ addi r5,r5,16
+ stxvd2x vs31,r5,r3
+ blr
+FUNC_END(store_vsx)
diff --git a/tools/testing/selftests/powerpc/lib/reg.S b/tools/testing/selftests/powerpc/lib/reg.S
new file mode 100644
index 000000000..0dc44f0da
--- /dev/null
+++ b/tools/testing/selftests/powerpc/lib/reg.S
@@ -0,0 +1,397 @@
+/*
+ * test helper assembly functions
+ *
+ * Copyright (C) 2016 Simon Guo, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <ppc-asm.h>
+#include "reg.h"
+
+
+/* Non volatile GPR - unsigned long buf[18] */
+FUNC_START(load_gpr)
+ ld 14, 0*8(3)
+ ld 15, 1*8(3)
+ ld 16, 2*8(3)
+ ld 17, 3*8(3)
+ ld 18, 4*8(3)
+ ld 19, 5*8(3)
+ ld 20, 6*8(3)
+ ld 21, 7*8(3)
+ ld 22, 8*8(3)
+ ld 23, 9*8(3)
+ ld 24, 10*8(3)
+ ld 25, 11*8(3)
+ ld 26, 12*8(3)
+ ld 27, 13*8(3)
+ ld 28, 14*8(3)
+ ld 29, 15*8(3)
+ ld 30, 16*8(3)
+ ld 31, 17*8(3)
+ blr
+FUNC_END(load_gpr)
+
+FUNC_START(store_gpr)
+ std 14, 0*8(3)
+ std 15, 1*8(3)
+ std 16, 2*8(3)
+ std 17, 3*8(3)
+ std 18, 4*8(3)
+ std 19, 5*8(3)
+ std 20, 6*8(3)
+ std 21, 7*8(3)
+ std 22, 8*8(3)
+ std 23, 9*8(3)
+ std 24, 10*8(3)
+ std 25, 11*8(3)
+ std 26, 12*8(3)
+ std 27, 13*8(3)
+ std 28, 14*8(3)
+ std 29, 15*8(3)
+ std 30, 16*8(3)
+ std 31, 17*8(3)
+ blr
+FUNC_END(store_gpr)
+
+/* Single Precision Float - float buf[32] */
+FUNC_START(load_fpr_single_precision)
+ lfs 0, 0*4(3)
+ lfs 1, 1*4(3)
+ lfs 2, 2*4(3)
+ lfs 3, 3*4(3)
+ lfs 4, 4*4(3)
+ lfs 5, 5*4(3)
+ lfs 6, 6*4(3)
+ lfs 7, 7*4(3)
+ lfs 8, 8*4(3)
+ lfs 9, 9*4(3)
+ lfs 10, 10*4(3)
+ lfs 11, 11*4(3)
+ lfs 12, 12*4(3)
+ lfs 13, 13*4(3)
+ lfs 14, 14*4(3)
+ lfs 15, 15*4(3)
+ lfs 16, 16*4(3)
+ lfs 17, 17*4(3)
+ lfs 18, 18*4(3)
+ lfs 19, 19*4(3)
+ lfs 20, 20*4(3)
+ lfs 21, 21*4(3)
+ lfs 22, 22*4(3)
+ lfs 23, 23*4(3)
+ lfs 24, 24*4(3)
+ lfs 25, 25*4(3)
+ lfs 26, 26*4(3)
+ lfs 27, 27*4(3)
+ lfs 28, 28*4(3)
+ lfs 29, 29*4(3)
+ lfs 30, 30*4(3)
+ lfs 31, 31*4(3)
+ blr
+FUNC_END(load_fpr_single_precision)
+
+/* Single Precision Float - float buf[32] */
+FUNC_START(store_fpr_single_precision)
+ stfs 0, 0*4(3)
+ stfs 1, 1*4(3)
+ stfs 2, 2*4(3)
+ stfs 3, 3*4(3)
+ stfs 4, 4*4(3)
+ stfs 5, 5*4(3)
+ stfs 6, 6*4(3)
+ stfs 7, 7*4(3)
+ stfs 8, 8*4(3)
+ stfs 9, 9*4(3)
+ stfs 10, 10*4(3)
+ stfs 11, 11*4(3)
+ stfs 12, 12*4(3)
+ stfs 13, 13*4(3)
+ stfs 14, 14*4(3)
+ stfs 15, 15*4(3)
+ stfs 16, 16*4(3)
+ stfs 17, 17*4(3)
+ stfs 18, 18*4(3)
+ stfs 19, 19*4(3)
+ stfs 20, 20*4(3)
+ stfs 21, 21*4(3)
+ stfs 22, 22*4(3)
+ stfs 23, 23*4(3)
+ stfs 24, 24*4(3)
+ stfs 25, 25*4(3)
+ stfs 26, 26*4(3)
+ stfs 27, 27*4(3)
+ stfs 28, 28*4(3)
+ stfs 29, 29*4(3)
+ stfs 30, 30*4(3)
+ stfs 31, 31*4(3)
+ blr
+FUNC_END(store_fpr_single_precision)
+
+/* VMX/VSX registers - unsigned long buf[128] */
+FUNC_START(loadvsx)
+ lis 4, 0
+ LXVD2X (0,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (1,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (2,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (3,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (4,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (5,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (6,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (7,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (8,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (9,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (10,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (11,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (12,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (13,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (14,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (15,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (16,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (17,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (18,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (19,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (20,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (21,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (22,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (23,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (24,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (25,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (26,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (27,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (28,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (29,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (30,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (31,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (32,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (33,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (34,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (35,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (36,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (37,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (38,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (39,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (40,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (41,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (42,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (43,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (44,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (45,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (46,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (47,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (48,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (49,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (50,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (51,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (52,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (53,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (54,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (55,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (56,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (57,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (58,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (59,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (60,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (61,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (62,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (63,(4),(3))
+ blr
+FUNC_END(loadvsx)
+
+FUNC_START(storevsx)
+ lis 4, 0
+ STXVD2X (0,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (1,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (2,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (3,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (4,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (5,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (6,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (7,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (8,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (9,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (10,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (11,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (12,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (13,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (14,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (15,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (16,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (17,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (18,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (19,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (20,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (21,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (22,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (23,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (24,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (25,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (26,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (27,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (28,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (29,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (30,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (31,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (32,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (33,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (34,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (35,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (36,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (37,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (38,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (39,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (40,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (41,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (42,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (43,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (44,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (45,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (46,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (47,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (48,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (49,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (50,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (51,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (52,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (53,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (54,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (55,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (56,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (57,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (58,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (59,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (60,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (61,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (62,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (63,(4),(3))
+ blr
+FUNC_END(storevsx)
diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore
new file mode 100644
index 000000000..50ded63e2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -0,0 +1,7 @@
+fpu_syscall
+vmx_syscall
+fpu_preempt
+vmx_preempt
+fpu_signal
+vmx_signal
+vsx_preempt
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
new file mode 100644
index 000000000..11a10d7a2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec
+
+$(OUTPUT)/fpu_syscall: fpu_asm.S
+$(OUTPUT)/fpu_preempt: fpu_asm.S
+$(OUTPUT)/fpu_signal: fpu_asm.S
+
+$(OUTPUT)/vmx_syscall: vmx_asm.S
+$(OUTPUT)/vmx_preempt: vmx_asm.S
+$(OUTPUT)/vmx_signal: vmx_asm.S
+
+$(OUTPUT)/vsx_preempt: CFLAGS += -mvsx
+$(OUTPUT)/vsx_preempt: vsx_asm.S
diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S b/tools/testing/selftests/powerpc/math/fpu_asm.S
new file mode 100644
index 000000000..8a04bb117
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_asm.S
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+#include "fpu_asm.h"
+
+FUNC_START(check_fpu)
+ mr r4,r3
+ li r3,1 # assume a bad result
+ lfd f0,0(r4)
+ fcmpu cr1,f0,f14
+ bne cr1,1f
+ lfd f0,8(r4)
+ fcmpu cr1,f0,f15
+ bne cr1,1f
+ lfd f0,16(r4)
+ fcmpu cr1,f0,f16
+ bne cr1,1f
+ lfd f0,24(r4)
+ fcmpu cr1,f0,f17
+ bne cr1,1f
+ lfd f0,32(r4)
+ fcmpu cr1,f0,f18
+ bne cr1,1f
+ lfd f0,40(r4)
+ fcmpu cr1,f0,f19
+ bne cr1,1f
+ lfd f0,48(r4)
+ fcmpu cr1,f0,f20
+ bne cr1,1f
+ lfd f0,56(r4)
+ fcmpu cr1,f0,f21
+ bne cr1,1f
+ lfd f0,64(r4)
+ fcmpu cr1,f0,f22
+ bne cr1,1f
+ lfd f0,72(r4)
+ fcmpu cr1,f0,f23
+ bne cr1,1f
+ lfd f0,80(r4)
+ fcmpu cr1,f0,f24
+ bne cr1,1f
+ lfd f0,88(r4)
+ fcmpu cr1,f0,f25
+ bne cr1,1f
+ lfd f0,96(r4)
+ fcmpu cr1,f0,f26
+ bne cr1,1f
+ lfd f0,104(r4)
+ fcmpu cr1,f0,f27
+ bne cr1,1f
+ lfd f0,112(r4)
+ fcmpu cr1,f0,f28
+ bne cr1,1f
+ lfd f0,120(r4)
+ fcmpu cr1,f0,f29
+ bne cr1,1f
+ lfd f0,128(r4)
+ fcmpu cr1,f0,f30
+ bne cr1,1f
+ lfd f0,136(r4)
+ fcmpu cr1,f0,f31
+ bne cr1,1f
+ li r3,0 # Success!!!
+1: blr
+
+FUNC_START(test_fpu)
+ # r3 holds pointer to where to put the result of fork
+ # r4 holds pointer to the pid
+ # f14-f31 are non volatiles
+ PUSH_BASIC_STACK(256)
+ PUSH_FPU(256)
+ std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray
+ std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid
+
+ bl load_fpu
+ nop
+ li r0,__NR_fork
+ sc
+
+ # pass the result of the fork to the caller
+ ld r9,STACK_FRAME_PARAM(1)(sp)
+ std r3,0(r9)
+
+ ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_fpu
+ nop
+
+ POP_FPU(256)
+ POP_BASIC_STACK(256)
+ blr
+FUNC_END(test_fpu)
+
+# int preempt_fpu(double *darray, int *threads_running, int *running)
+# On starting will (atomically) decrement not_ready as a signal that the FPU
+# has been loaded with darray. Will proceed to check the validity of the FPU
+# registers while running is not zero.
+FUNC_START(preempt_fpu)
+ PUSH_BASIC_STACK(256)
+ PUSH_FPU(256)
+ std r3,STACK_FRAME_PARAM(0)(sp) # double *darray
+ std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
+ std r5,STACK_FRAME_PARAM(2)(sp) # int *running
+
+ bl load_fpu
+ nop
+
+ sync
+ # Atomic DEC
+ ld r3,STACK_FRAME_PARAM(1)(sp)
+1: lwarx r4,0,r3
+ addi r4,r4,-1
+ stwcx. r4,0,r3
+ bne- 1b
+
+2: ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_fpu
+ nop
+ cmpdi r3,0
+ bne 3f
+ ld r4,STACK_FRAME_PARAM(2)(sp)
+ ld r5,0(r4)
+ cmpwi r5,0
+ bne 2b
+
+3: POP_FPU(256)
+ POP_BASIC_STACK(256)
+ blr
+FUNC_END(preempt_fpu)
diff --git a/tools/testing/selftests/powerpc/math/fpu_preempt.c b/tools/testing/selftests/powerpc/math/fpu_preempt.c
new file mode 100644
index 000000000..0f85b79d8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_preempt.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the FPU registers change across preemption.
+ * Two things should be noted here a) The check_fpu function in asm only checks
+ * the non volatile registers as it is reused from the syscall test b) There is
+ * no way to be sure preemption happened so this test just uses many threads
+ * and a long wait. As such, a successful test doesn't mean much but a failure
+ * is bad.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Time to wait for workers to get preempted (seconds) */
+#define PREEMPT_TIME 20
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+
+__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+ 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+ 2.1};
+
+int threads_starting;
+int running;
+
+extern void preempt_fpu(double *darray, int *threads_starting, int *running);
+
+void *preempt_fpu_c(void *p)
+{
+ int i;
+ srand(pthread_self());
+ for (i = 0; i < 21; i++)
+ darray[i] = rand();
+
+ /* Test failed if it ever returns */
+ preempt_fpu(darray, &threads_starting, &running);
+
+ return p;
+}
+
+int test_preempt_fpu(void)
+{
+ int i, rc, threads;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc((threads) * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, preempt_fpu_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ /* Not really necessary but nice to wait for every thread to start */
+ printf("\tWaiting for all workers to start...");
+ while(threads_starting)
+ asm volatile("": : :"memory");
+ printf("done\n");
+
+ printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME);
+ sleep(PREEMPT_TIME);
+ printf("done\n");
+
+ printf("\tStopping workers...");
+ /*
+ * Working are checking this value every loop. In preempt_fpu 'cmpwi r5,0; bne 2b'.
+ * r5 will have loaded the value of running.
+ */
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ void *rc_p;
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why preempt_fpu
+ * returned
+ */
+ if ((long) rc_p)
+ printf("oops\n");
+ FAIL_IF((long) rc_p);
+ }
+ printf("done\n");
+
+ free(tids);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_preempt_fpu, "fpu_preempt");
+}
diff --git a/tools/testing/selftests/powerpc/math/fpu_signal.c b/tools/testing/selftests/powerpc/math/fpu_signal.c
new file mode 100644
index 000000000..888aa51b4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_signal.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the FPU registers are correctly reported in a
+ * signal context. Each worker just spins checking its FPU registers, at some
+ * point a signal will interrupt it and C code will check the signal context
+ * ensuring it is also the same.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Number of times each thread should receive the signal */
+#define ITERATIONS 10
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+ 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+ 2.1};
+
+bool bad_context;
+int threads_starting;
+int running;
+
+extern long preempt_fpu(double *darray, int *threads_starting, int *running);
+
+void signal_fpu_sig(int sig, siginfo_t *info, void *context)
+{
+ int i;
+ ucontext_t *uc = context;
+ mcontext_t *mc = &uc->uc_mcontext;
+
+ /* Only the non volatiles were loaded up */
+ for (i = 14; i < 32; i++) {
+ if (mc->fp_regs[i] != darray[i - 14]) {
+ bad_context = true;
+ break;
+ }
+ }
+}
+
+void *signal_fpu_c(void *p)
+{
+ int i;
+ long rc;
+ struct sigaction act;
+ act.sa_sigaction = signal_fpu_sig;
+ act.sa_flags = SA_SIGINFO;
+ rc = sigaction(SIGUSR1, &act, NULL);
+ if (rc)
+ return p;
+
+ srand(pthread_self());
+ for (i = 0; i < 21; i++)
+ darray[i] = rand();
+
+ rc = preempt_fpu(darray, &threads_starting, &running);
+
+ return (void *) rc;
+}
+
+int test_signal_fpu(void)
+{
+ int i, j, rc, threads;
+ void *rc_p;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc(threads * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, signal_fpu_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ printf("\tWaiting for all workers to start...");
+ while (threads_starting)
+ asm volatile("": : :"memory");
+ printf("done\n");
+
+ printf("\tSending signals to all threads %d times...", ITERATIONS);
+ for (i = 0; i < ITERATIONS; i++) {
+ for (j = 0; j < threads; j++) {
+ pthread_kill(tids[j], SIGUSR1);
+ }
+ sleep(1);
+ }
+ printf("done\n");
+
+ printf("\tStopping workers...");
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why signal_fpu
+ * returned
+ */
+ if ((long) rc_p || bad_context)
+ printf("oops\n");
+ if (bad_context)
+ fprintf(stderr, "\t!! bad_context is true\n");
+ FAIL_IF((long) rc_p || bad_context);
+ }
+ printf("done\n");
+
+ free(tids);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_signal_fpu, "fpu_signal");
+}
diff --git a/tools/testing/selftests/powerpc/math/fpu_syscall.c b/tools/testing/selftests/powerpc/math/fpu_syscall.c
new file mode 100644
index 000000000..949e67212
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_syscall.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the FPU registers change across a syscall (fork).
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+
+#include "utils.h"
+
+extern int test_fpu(double *darray, pid_t *pid);
+
+double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+ 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+ 2.1};
+
+int syscall_fpu(void)
+{
+ pid_t fork_pid;
+ int i;
+ int ret;
+ int child_ret;
+ for (i = 0; i < 1000; i++) {
+ /* test_fpu will fork() */
+ ret = test_fpu(darray, &fork_pid);
+ if (fork_pid == -1)
+ return -1;
+ if (fork_pid == 0)
+ exit(ret);
+ waitpid(fork_pid, &child_ret, 0);
+ if (ret || child_ret)
+ return 1;
+ }
+
+ return 0;
+}
+
+int test_syscall_fpu(void)
+{
+ /*
+ * Setup an environment with much context switching
+ */
+ pid_t pid2;
+ pid_t pid = fork();
+ int ret;
+ int child_ret;
+ FAIL_IF(pid == -1);
+
+ pid2 = fork();
+ /* Can't FAIL_IF(pid2 == -1); because already forked once */
+ if (pid2 == -1) {
+ /*
+ * Couldn't fork, ensure test is a fail
+ */
+ child_ret = ret = 1;
+ } else {
+ ret = syscall_fpu();
+ if (pid2)
+ waitpid(pid2, &child_ret, 0);
+ else
+ exit(ret);
+ }
+
+ ret |= child_ret;
+
+ if (pid)
+ waitpid(pid, &child_ret, 0);
+ else
+ exit(ret);
+
+ FAIL_IF(ret || child_ret);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_syscall_fpu, "syscall_fpu");
+
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_asm.S b/tools/testing/selftests/powerpc/math/vmx_asm.S
new file mode 100644
index 000000000..cb1e5ae1b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_asm.S
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+#include "vmx_asm.h"
+
+# Should be safe from C, only touches r4, r5 and v0,v1,v2
+FUNC_START(check_vmx)
+ PUSH_BASIC_STACK(32)
+ mr r4,r3
+ li r3,1 # assume a bad result
+ li r5,0
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v20
+ vmr v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v21
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v22
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v23
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v24
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v25
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v26
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v27
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v28
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v29
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v30
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v31
+ vand v2,v2,v1
+
+ li r5,STACK_FRAME_LOCAL(0,0)
+ stvx v2,r5,sp
+ ldx r0,r5,sp
+ cmpdi r0,0xffffffffffffffff
+ bne 1f
+ li r3,0
+1: POP_BASIC_STACK(32)
+ blr
+FUNC_END(check_vmx)
+
+# Safe from C
+FUNC_START(test_vmx)
+ # r3 holds pointer to where to put the result of fork
+ # r4 holds pointer to the pid
+ # v20-v31 are non-volatile
+ PUSH_BASIC_STACK(512)
+ std r3,STACK_FRAME_PARAM(0)(sp) # Address of varray
+ std r4,STACK_FRAME_PARAM(1)(sp) # address of pid
+ PUSH_VMX(STACK_FRAME_LOCAL(2,0),r4)
+
+ bl load_vmx
+ nop
+
+ li r0,__NR_fork
+ sc
+ # Pass the result of fork back to the caller
+ ld r9,STACK_FRAME_PARAM(1)(sp)
+ std r3,0(r9)
+
+ ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_vmx
+ nop
+
+ POP_VMX(STACK_FRAME_LOCAL(2,0),r4)
+ POP_BASIC_STACK(512)
+ blr
+FUNC_END(test_vmx)
+
+# int preempt_vmx(vector int *varray, int *threads_starting, int *running)
+# On starting will (atomically) decrement threads_starting as a signal that
+# the VMX have been loaded with varray. Will proceed to check the validity of
+# the VMX registers while running is not zero.
+FUNC_START(preempt_vmx)
+ PUSH_BASIC_STACK(512)
+ std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray
+ std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
+ std r5,STACK_FRAME_PARAM(2)(sp) # int *running
+ # VMX need to write to 16 byte aligned addresses, skip STACK_FRAME_LOCAL(3,0)
+ PUSH_VMX(STACK_FRAME_LOCAL(4,0),r4)
+
+ bl load_vmx
+ nop
+
+ sync
+ # Atomic DEC
+ ld r3,STACK_FRAME_PARAM(1)(sp)
+1: lwarx r4,0,r3
+ addi r4,r4,-1
+ stwcx. r4,0,r3
+ bne- 1b
+
+2: ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_vmx
+ nop
+ cmpdi r3,0
+ bne 3f
+ ld r4,STACK_FRAME_PARAM(2)(sp)
+ ld r5,0(r4)
+ cmpwi r5,0
+ bne 2b
+
+3: POP_VMX(STACK_FRAME_LOCAL(4,0),r4)
+ POP_BASIC_STACK(512)
+ blr
+FUNC_END(preempt_vmx)
diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c
new file mode 100644
index 000000000..9ef376c55
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VMX registers change across preemption.
+ * Two things should be noted here a) The check_vmx function in asm only checks
+ * the non volatile registers as it is reused from the syscall test b) There is
+ * no way to be sure preemption happened so this test just uses many threads
+ * and a long wait. As such, a successful test doesn't mean much but a failure
+ * is bad.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Time to wait for workers to get preempted (seconds) */
+#define PREEMPT_TIME 20
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
+ {13,14,15,16},{17,18,19,20},{21,22,23,24},
+ {25,26,27,28},{29,30,31,32},{33,34,35,36},
+ {37,38,39,40},{41,42,43,44},{45,46,47,48}};
+
+int threads_starting;
+int running;
+
+extern void preempt_vmx(vector int *varray, int *threads_starting, int *running);
+
+void *preempt_vmx_c(void *p)
+{
+ int i, j;
+ srand(pthread_self());
+ for (i = 0; i < 12; i++)
+ for (j = 0; j < 4; j++)
+ varray[i][j] = rand();
+
+ /* Test fails if it ever returns */
+ preempt_vmx(varray, &threads_starting, &running);
+ return p;
+}
+
+int test_preempt_vmx(void)
+{
+ int i, rc, threads;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc(threads * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, preempt_vmx_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ /* Not really nessesary but nice to wait for every thread to start */
+ printf("\tWaiting for all workers to start...");
+ while(threads_starting)
+ asm volatile("": : :"memory");
+ printf("done\n");
+
+ printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME);
+ sleep(PREEMPT_TIME);
+ printf("done\n");
+
+ printf("\tStopping workers...");
+ /*
+ * Working are checking this value every loop. In preempt_vmx 'cmpwi r5,0; bne 2b'.
+ * r5 will have loaded the value of running.
+ */
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ void *rc_p;
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why preempt_vmx
+ * returned
+ */
+ if ((long) rc_p)
+ printf("oops\n");
+ FAIL_IF((long) rc_p);
+ }
+ printf("done\n");
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_preempt_vmx, "vmx_preempt");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c
new file mode 100644
index 000000000..671d7533a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_signal.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VMX registers are correctly reported in a
+ * signal context. Each worker just spins checking its VMX registers, at some
+ * point a signal will interrupt it and C code will check the signal context
+ * ensuring it is also the same.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <altivec.h>
+
+#include "utils.h"
+
+/* Number of times each thread should receive the signal */
+#define ITERATIONS 10
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
+ {13,14,15,16},{17,18,19,20},{21,22,23,24},
+ {25,26,27,28},{29,30,31,32},{33,34,35,36},
+ {37,38,39,40},{41,42,43,44},{45,46,47,48}};
+
+bool bad_context;
+int running;
+int threads_starting;
+
+extern int preempt_vmx(vector int *varray, int *threads_starting, int *sentinal);
+
+void signal_vmx_sig(int sig, siginfo_t *info, void *context)
+{
+ int i;
+ ucontext_t *uc = context;
+ mcontext_t *mc = &uc->uc_mcontext;
+
+ /* Only the non volatiles were loaded up */
+ for (i = 20; i < 32; i++) {
+ if (memcmp(mc->v_regs->vrregs[i], &varray[i - 20], 16)) {
+ int j;
+ /*
+ * Shouldn't printf() in a signal handler, however, this is a
+ * test and we've detected failure. Understanding what failed
+ * is paramount. All that happens after this is tests exit with
+ * failure.
+ */
+ printf("VMX mismatch at reg %d!\n", i);
+ printf("Reg | Actual | Expected\n");
+ for (j = 20; j < 32; j++) {
+ printf("%d | 0x%04x%04x%04x%04x | 0x%04x%04x%04x%04x\n", j, mc->v_regs->vrregs[j][0],
+ mc->v_regs->vrregs[j][1], mc->v_regs->vrregs[j][2], mc->v_regs->vrregs[j][3],
+ varray[j - 20][0], varray[j - 20][1], varray[j - 20][2], varray[j - 20][3]);
+ }
+ bad_context = true;
+ break;
+ }
+ }
+}
+
+void *signal_vmx_c(void *p)
+{
+ int i, j;
+ long rc;
+ struct sigaction act;
+ act.sa_sigaction = signal_vmx_sig;
+ act.sa_flags = SA_SIGINFO;
+ rc = sigaction(SIGUSR1, &act, NULL);
+ if (rc)
+ return p;
+
+ srand(pthread_self());
+ for (i = 0; i < 12; i++)
+ for (j = 0; j < 4; j++)
+ varray[i][j] = rand();
+
+ rc = preempt_vmx(varray, &threads_starting, &running);
+
+ return (void *) rc;
+}
+
+int test_signal_vmx(void)
+{
+ int i, j, rc, threads;
+ void *rc_p;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc(threads * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, signal_vmx_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ printf("\tWaiting for %d workers to start... %d", threads, threads_starting);
+ while (threads_starting) {
+ asm volatile("": : :"memory");
+ usleep(1000);
+ printf(", %d", threads_starting);
+ }
+ printf(" ...done\n");
+
+ printf("\tSending signals to all threads %d times...", ITERATIONS);
+ for (i = 0; i < ITERATIONS; i++) {
+ for (j = 0; j < threads; j++) {
+ pthread_kill(tids[j], SIGUSR1);
+ }
+ sleep(1);
+ }
+ printf("done\n");
+
+ printf("\tKilling workers...");
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why signal_vmx
+ * returned
+ */
+ if ((long) rc_p || bad_context)
+ printf("oops\n");
+ if (bad_context)
+ fprintf(stderr, "\t!! bad_context is true\n");
+ FAIL_IF((long) rc_p || bad_context);
+ }
+ printf("done\n");
+
+ free(tids);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_signal_vmx, "vmx_signal");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c
new file mode 100644
index 000000000..a017918ee
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VMX registers change across a syscall (fork).
+ */
+
+#include <altivec.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "utils.h"
+
+vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
+ {13,14,15,16},{17,18,19,20},{21,22,23,24},
+ {25,26,27,28},{29,30,31,32},{33,34,35,36},
+ {37,38,39,40},{41,42,43,44},{45,46,47,48}};
+
+extern int test_vmx(vector int *varray, pid_t *pid);
+
+int vmx_syscall(void)
+{
+ pid_t fork_pid;
+ int i;
+ int ret;
+ int child_ret;
+ for (i = 0; i < 1000; i++) {
+ /* test_vmx will fork() */
+ ret = test_vmx(varray, &fork_pid);
+ if (fork_pid == -1)
+ return -1;
+ if (fork_pid == 0)
+ exit(ret);
+ waitpid(fork_pid, &child_ret, 0);
+ if (ret || child_ret)
+ return 1;
+ }
+
+ return 0;
+}
+
+int test_vmx_syscall(void)
+{
+ /*
+ * Setup an environment with much context switching
+ */
+ pid_t pid2;
+ pid_t pid = fork();
+ int ret;
+ int child_ret;
+ FAIL_IF(pid == -1);
+
+ pid2 = fork();
+ ret = vmx_syscall();
+ /* Can't FAIL_IF(pid2 == -1); because we've already forked */
+ if (pid2 == -1) {
+ /*
+ * Couldn't fork, ensure child_ret is set and is a fail
+ */
+ ret = child_ret = 1;
+ } else {
+ if (pid2)
+ waitpid(pid2, &child_ret, 0);
+ else
+ exit(ret);
+ }
+
+ ret |= child_ret;
+
+ if (pid)
+ waitpid(pid, &child_ret, 0);
+ else
+ exit(ret);
+
+ FAIL_IF(ret || child_ret);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_vmx_syscall, "vmx_syscall");
+
+}
diff --git a/tools/testing/selftests/powerpc/math/vsx_asm.S b/tools/testing/selftests/powerpc/math/vsx_asm.S
new file mode 100644
index 000000000..8f431f6ab
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vsx_asm.S
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+#include "vsx_asm.h"
+
+#long check_vsx(vector int *r3);
+#This function wraps storeing VSX regs to the end of an array and a
+#call to a comparison function in C which boils down to a memcmp()
+FUNC_START(check_vsx)
+ PUSH_BASIC_STACK(32)
+ std r3,STACK_FRAME_PARAM(0)(sp)
+ addi r3, r3, 16 * 12 #Second half of array
+ bl store_vsx
+ ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl vsx_memcmp
+ POP_BASIC_STACK(32)
+ blr
+FUNC_END(check_vsx)
+
+# int preempt_vmx(vector int *varray, int *threads_starting,
+# int *running);
+# On starting will (atomically) decrement threads_starting as a signal
+# that the VMX have been loaded with varray. Will proceed to check the
+# validity of the VMX registers while running is not zero.
+FUNC_START(preempt_vsx)
+ PUSH_BASIC_STACK(512)
+ std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray
+ std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
+ std r5,STACK_FRAME_PARAM(2)(sp) # int *running
+
+ bl load_vsx
+ nop
+
+ sync
+ # Atomic DEC
+ ld r3,STACK_FRAME_PARAM(1)(sp)
+1: lwarx r4,0,r3
+ addi r4,r4,-1
+ stwcx. r4,0,r3
+ bne- 1b
+
+2: ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_vsx
+ nop
+ cmpdi r3,0
+ bne 3f
+ ld r4,STACK_FRAME_PARAM(2)(sp)
+ ld r5,0(r4)
+ cmpwi r5,0
+ bne 2b
+
+3: POP_BASIC_STACK(512)
+ blr
+FUNC_END(preempt_vsx)
diff --git a/tools/testing/selftests/powerpc/math/vsx_preempt.c b/tools/testing/selftests/powerpc/math/vsx_preempt.c
new file mode 100644
index 000000000..6387f03a0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vsx_preempt.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VSX registers change across preemption.
+ * There is no way to be sure preemption happened so this test just
+ * uses many threads and a long wait. As such, a successful test
+ * doesn't mean much but a failure is bad.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Time to wait for workers to get preempted (seconds) */
+#define PREEMPT_TIME 20
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+/*
+ * Ensure there is twice the number of non-volatile VMX regs!
+ * check_vmx() is going to use the other half as space to put the live
+ * registers before calling vsx_memcmp()
+ */
+__thread vector int varray[24] = {
+ {1, 2, 3, 4 }, {5, 6, 7, 8 }, {9, 10,11,12},
+ {13,14,15,16}, {17,18,19,20}, {21,22,23,24},
+ {25,26,27,28}, {29,30,31,32}, {33,34,35,36},
+ {37,38,39,40}, {41,42,43,44}, {45,46,47,48}
+};
+
+int threads_starting;
+int running;
+
+extern long preempt_vsx(vector int *varray, int *threads_starting, int *running);
+
+long vsx_memcmp(vector int *a) {
+ vector int zero = {0, 0, 0, 0};
+ int i;
+
+ FAIL_IF(a != varray);
+
+ for(i = 0; i < 12; i++) {
+ if (memcmp(&a[i + 12], &zero, sizeof(vector int)) == 0) {
+ fprintf(stderr, "Detected zero from the VSX reg %d\n", i + 12);
+ return 2;
+ }
+ }
+
+ if (memcmp(a, &a[12], 12 * sizeof(vector int))) {
+ long *p = (long *)a;
+ fprintf(stderr, "VSX mismatch\n");
+ for (i = 0; i < 24; i=i+2)
+ fprintf(stderr, "%d: 0x%08lx%08lx | 0x%08lx%08lx\n",
+ i/2 + i%2 + 20, p[i], p[i + 1], p[i + 24], p[i + 25]);
+ return 1;
+ }
+ return 0;
+}
+
+void *preempt_vsx_c(void *p)
+{
+ int i, j;
+ long rc;
+ srand(pthread_self());
+ for (i = 0; i < 12; i++)
+ for (j = 0; j < 4; j++) {
+ varray[i][j] = rand();
+ /* Don't want zero because it hides kernel problems */
+ if (varray[i][j] == 0)
+ j--;
+ }
+ rc = preempt_vsx(varray, &threads_starting, &running);
+ if (rc == 2)
+ fprintf(stderr, "Caught zeros in VSX compares\n");
+ return (void *)rc;
+}
+
+int test_preempt_vsx(void)
+{
+ int i, rc, threads;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc(threads * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, preempt_vsx_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ /* Not really nessesary but nice to wait for every thread to start */
+ printf("\tWaiting for %d workers to start...", threads_starting);
+ while(threads_starting)
+ asm volatile("": : :"memory");
+ printf("done\n");
+
+ printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME);
+ sleep(PREEMPT_TIME);
+ printf("done\n");
+
+ printf("\tStopping workers...");
+ /*
+ * Working are checking this value every loop. In preempt_vsx 'cmpwi r5,0; bne 2b'.
+ * r5 will have loaded the value of running.
+ */
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ void *rc_p;
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why preempt_vsx
+ * returned
+ */
+ if ((long) rc_p)
+ printf("oops\n");
+ FAIL_IF((long) rc_p);
+ }
+ printf("done\n");
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_preempt_vsx, "vsx_preempt");
+}
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
new file mode 100644
index 000000000..7d7c42ed6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -0,0 +1,5 @@
+hugetlb_vs_thp_test
+subpage_prot
+tempfile
+prot_sao
+segv_errors \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
new file mode 100644
index 000000000..869628234
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+ $(MAKE) -C ../
+
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors
+TEST_GEN_PROGS_EXTENDED := tlbie_test
+TEST_GEN_FILES := tempfile
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
+
+$(OUTPUT)/prot_sao: ../utils.c
+
+$(OUTPUT)/tempfile:
+ dd if=/dev/zero of=$@ bs=64k count=1
+
+$(OUTPUT)/tlbie_test: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c b/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c
new file mode 100644
index 000000000..9932359ce
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+/* This must match the huge page & THP size */
+#define SIZE (16 * 1024 * 1024)
+
+static int test_body(void)
+{
+ void *addr;
+ char *p;
+
+ addr = (void *)0xa0000000;
+
+ p = mmap(addr, SIZE, PROT_READ | PROT_WRITE,
+ MAP_HUGETLB | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (p != MAP_FAILED) {
+ /*
+ * Typically the mmap will fail because no huge pages are
+ * allocated on the system. But if there are huge pages
+ * allocated the mmap will succeed. That's fine too, we just
+ * munmap here before continuing. munmap() length of
+ * MAP_HUGETLB memory must be hugepage aligned.
+ */
+ if (munmap(addr, SIZE)) {
+ perror("munmap");
+ return 1;
+ }
+ }
+
+ p = mmap(addr, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (p == MAP_FAILED) {
+ printf("Mapping failed @ %p\n", addr);
+ perror("mmap");
+ return 1;
+ }
+
+ /*
+ * Either a user or kernel access is sufficient to trigger the bug.
+ * A kernel access is easier to spot & debug, as it will trigger the
+ * softlockup or RCU stall detectors, and when the system is kicked
+ * into xmon we get a backtrace in the kernel.
+ *
+ * A good option is:
+ * getcwd(p, SIZE);
+ *
+ * For the purposes of this testcase it's preferable to spin in
+ * userspace, so the harness can kill us if we get stuck. That way we
+ * see a test failure rather than a dead system.
+ */
+ *p = 0xf;
+
+ munmap(addr, SIZE);
+
+ return 0;
+}
+
+static int test_main(void)
+{
+ int i;
+
+ /* 10,000 because it's a "bunch", and completes reasonably quickly */
+ for (i = 0; i < 10000; i++)
+ if (test_body())
+ return 1;
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_main, "hugetlb_vs_thp");
+}
diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c
new file mode 100644
index 000000000..611530d43
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/prot_sao.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2016, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include <asm/cputable.h>
+
+#include "utils.h"
+
+#define SIZE (64 * 1024)
+
+int test_prot_sao(void)
+{
+ char *p;
+
+ /* 2.06 or later should support SAO */
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+ /*
+ * Ensure we can ask for PROT_SAO.
+ * We can't really verify that it does the right thing, but at least we
+ * confirm the kernel will accept it.
+ */
+ p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE | PROT_SAO,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ FAIL_IF(p == MAP_FAILED);
+
+ /* Write to the mapping, to at least cause a fault */
+ memset(p, 0xaa, SIZE);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_prot_sao, "prot-sao");
+}
diff --git a/tools/testing/selftests/powerpc/mm/segv_errors.c b/tools/testing/selftests/powerpc/mm/segv_errors.c
new file mode 100644
index 000000000..06ae76ee3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/segv_errors.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2017 John Sperbeck
+ *
+ * Test that an access to a mapped but inaccessible area causes a SEGV and
+ * reports si_code == SEGV_ACCERR.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <assert.h>
+#include <ucontext.h>
+
+#include "utils.h"
+
+static bool faulted;
+static int si_code;
+
+static void segv_handler(int n, siginfo_t *info, void *ctxt_v)
+{
+ ucontext_t *ctxt = (ucontext_t *)ctxt_v;
+ struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+ faulted = true;
+ si_code = info->si_code;
+ regs->nip += 4;
+}
+
+int test_segv_errors(void)
+{
+ struct sigaction act = {
+ .sa_sigaction = segv_handler,
+ .sa_flags = SA_SIGINFO,
+ };
+ char c, *p = NULL;
+
+ p = mmap(NULL, getpagesize(), 0, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ FAIL_IF(p == MAP_FAILED);
+
+ FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0);
+
+ faulted = false;
+ si_code = 0;
+
+ /*
+ * We just need a compiler barrier, but mb() works and has the nice
+ * property of being easy to spot in the disassembly.
+ */
+ mb();
+ c = *p;
+ mb();
+
+ FAIL_IF(!faulted);
+ FAIL_IF(si_code != SEGV_ACCERR);
+
+ faulted = false;
+ si_code = 0;
+
+ mb();
+ *p = c;
+ mb();
+
+ FAIL_IF(!faulted);
+ FAIL_IF(si_code != SEGV_ACCERR);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_segv_errors, "segv_errors");
+}
diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c
new file mode 100644
index 000000000..3ae77ba93
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+char *file_name;
+
+int in_test;
+volatile int faulted;
+volatile void *dar;
+int errors;
+
+static void segv(int signum, siginfo_t *info, void *ctxt_v)
+{
+ ucontext_t *ctxt = (ucontext_t *)ctxt_v;
+ struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+ if (!in_test) {
+ fprintf(stderr, "Segfault outside of test !\n");
+ exit(1);
+ }
+
+ faulted = 1;
+ dar = (void *)regs->dar;
+ regs->nip += 4;
+}
+
+static inline void do_read(const volatile void *addr)
+{
+ int ret;
+
+ asm volatile("lwz %0,0(%1); twi 0,%0,0; isync;\n"
+ : "=r" (ret) : "r" (addr) : "memory");
+}
+
+static inline void do_write(const volatile void *addr)
+{
+ int val = 0x1234567;
+
+ asm volatile("stw %0,0(%1); sync; \n"
+ : : "r" (val), "r" (addr) : "memory");
+}
+
+static inline void check_faulted(void *addr, long page, long subpage, int write)
+{
+ int want_fault = (subpage == ((page + 3) % 16));
+
+ if (write)
+ want_fault |= (subpage == ((page + 1) % 16));
+
+ if (faulted != want_fault) {
+ printf("Failed at %p (p=%ld,sp=%ld,w=%d), want=%s, got=%s !\n",
+ addr, page, subpage, write,
+ want_fault ? "fault" : "pass",
+ faulted ? "fault" : "pass");
+ ++errors;
+ }
+
+ if (faulted) {
+ if (dar != addr) {
+ printf("Fault expected at %p and happened at %p !\n",
+ addr, dar);
+ }
+ faulted = 0;
+ asm volatile("sync" : : : "memory");
+ }
+}
+
+static int run_test(void *addr, unsigned long size)
+{
+ unsigned int *map;
+ long i, j, pages, err;
+
+ pages = size / 0x10000;
+ map = malloc(pages * 4);
+ assert(map);
+
+ /*
+ * for each page, mark subpage i % 16 read only and subpage
+ * (i + 3) % 16 inaccessible
+ */
+ for (i = 0; i < pages; i++) {
+ map[i] = (0x40000000 >> (((i + 1) * 2) % 32)) |
+ (0xc0000000 >> (((i + 3) * 2) % 32));
+ }
+
+ err = syscall(__NR_subpage_prot, addr, size, map);
+ if (err) {
+ perror("subpage_perm");
+ return 1;
+ }
+ free(map);
+
+ in_test = 1;
+ errors = 0;
+ for (i = 0; i < pages; i++) {
+ for (j = 0; j < 16; j++, addr += 0x1000) {
+ do_read(addr);
+ check_faulted(addr, i, j, 0);
+ do_write(addr);
+ check_faulted(addr, i, j, 1);
+ }
+ }
+
+ in_test = 0;
+ if (errors) {
+ printf("%d errors detected\n", errors);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int syscall_available(void)
+{
+ int rc;
+
+ errno = 0;
+ rc = syscall(__NR_subpage_prot, 0, 0, 0);
+
+ return rc == 0 || (errno != ENOENT && errno != ENOSYS);
+}
+
+int test_anon(void)
+{
+ unsigned long align;
+ struct sigaction act = {
+ .sa_sigaction = segv,
+ .sa_flags = SA_SIGINFO
+ };
+ void *mallocblock;
+ unsigned long mallocsize;
+
+ SKIP_IF(!syscall_available());
+
+ if (getpagesize() != 0x10000) {
+ fprintf(stderr, "Kernel page size must be 64K!\n");
+ return 1;
+ }
+
+ sigaction(SIGSEGV, &act, NULL);
+
+ mallocsize = 4 * 16 * 1024 * 1024;
+
+ FAIL_IF(posix_memalign(&mallocblock, 64 * 1024, mallocsize));
+
+ align = (unsigned long)mallocblock;
+ if (align & 0xffff)
+ align = (align | 0xffff) + 1;
+
+ mallocblock = (void *)align;
+
+ printf("allocated malloc block of 0x%lx bytes at %p\n",
+ mallocsize, mallocblock);
+
+ printf("testing malloc block...\n");
+
+ return run_test(mallocblock, mallocsize);
+}
+
+int test_file(void)
+{
+ struct sigaction act = {
+ .sa_sigaction = segv,
+ .sa_flags = SA_SIGINFO
+ };
+ void *fileblock;
+ off_t filesize;
+ int fd;
+
+ SKIP_IF(!syscall_available());
+
+ fd = open(file_name, O_RDWR);
+ if (fd == -1) {
+ perror("failed to open file");
+ return 1;
+ }
+ sigaction(SIGSEGV, &act, NULL);
+
+ filesize = lseek(fd, 0, SEEK_END);
+ if (filesize & 0xffff)
+ filesize &= ~0xfffful;
+
+ fileblock = mmap(NULL, filesize, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (fileblock == MAP_FAILED) {
+ perror("failed to map file");
+ return 1;
+ }
+ printf("allocated %s for 0x%lx bytes at %p\n",
+ file_name, filesize, fileblock);
+
+ printf("testing file map...\n");
+
+ return run_test(fileblock, filesize);
+}
+
+int main(int argc, char *argv[])
+{
+ int rc;
+
+ rc = test_harness(test_anon, "subpage_prot_anon");
+ if (rc)
+ return rc;
+
+ if (argc > 1)
+ file_name = argv[1];
+ else
+ file_name = "tempfile";
+
+ return test_harness(test_file, "subpage_prot_file");
+}
diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c
new file mode 100644
index 000000000..f85a0938a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c
@@ -0,0 +1,734 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2019, Nick Piggin, Gautham R. Shenoy, Aneesh Kumar K.V, IBM Corp.
+ */
+
+/*
+ *
+ * Test tlbie/mtpidr race. We have 4 threads doing flush/load/compare/store
+ * sequence in a loop. The same threads also rung a context switch task
+ * that does sched_yield() in loop.
+ *
+ * The snapshot thread mark the mmap area PROT_READ in between, make a copy
+ * and copy it back to the original area. This helps us to detect if any
+ * store continued to happen after we marked the memory PROT_READ.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <linux/futex.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <time.h>
+#include <stdarg.h>
+#include <sched.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sys/prctl.h>
+
+static inline void dcbf(volatile unsigned int *addr)
+{
+ __asm__ __volatile__ ("dcbf %y0; sync" : : "Z"(*(unsigned char *)addr) : "memory");
+}
+
+static void err_msg(char *msg)
+{
+
+ time_t now;
+ time(&now);
+ printf("=================================\n");
+ printf(" Error: %s\n", msg);
+ printf(" %s", ctime(&now));
+ printf("=================================\n");
+ exit(1);
+}
+
+static char *map1;
+static char *map2;
+static pid_t rim_process_pid;
+
+/*
+ * A "rim-sequence" is defined to be the sequence of the following
+ * operations performed on a memory word:
+ * 1) FLUSH the contents of that word.
+ * 2) LOAD the contents of that word.
+ * 3) COMPARE the contents of that word with the content that was
+ * previously stored at that word
+ * 4) STORE new content into that word.
+ *
+ * The threads in this test that perform the rim-sequence are termed
+ * as rim_threads.
+ */
+
+/*
+ * A "corruption" is defined to be the failed COMPARE operation in a
+ * rim-sequence.
+ *
+ * A rim_thread that detects a corruption informs about it to all the
+ * other rim_threads, and the mem_snapshot thread.
+ */
+static volatile unsigned int corruption_found;
+
+/*
+ * This defines the maximum number of rim_threads in this test.
+ *
+ * The THREAD_ID_BITS denote the number of bits required
+ * to represent the thread_ids [0..MAX_THREADS - 1].
+ * We are being a bit paranoid here and set it to 8 bits,
+ * though 6 bits suffice.
+ *
+ */
+#define MAX_THREADS 64
+#define THREAD_ID_BITS 8
+#define THREAD_ID_MASK ((1 << THREAD_ID_BITS) - 1)
+static unsigned int rim_thread_ids[MAX_THREADS];
+static pthread_t rim_threads[MAX_THREADS];
+
+
+/*
+ * Each rim_thread works on an exclusive "chunk" of size
+ * RIM_CHUNK_SIZE.
+ *
+ * The ith rim_thread works on the ith chunk.
+ *
+ * The ith chunk begins at
+ * map1 + (i * RIM_CHUNK_SIZE)
+ */
+#define RIM_CHUNK_SIZE 1024
+#define BITS_PER_BYTE 8
+#define WORD_SIZE (sizeof(unsigned int))
+#define WORD_BITS (WORD_SIZE * BITS_PER_BYTE)
+#define WORDS_PER_CHUNK (RIM_CHUNK_SIZE/WORD_SIZE)
+
+static inline char *compute_chunk_start_addr(unsigned int thread_id)
+{
+ char *chunk_start;
+
+ chunk_start = (char *)((unsigned long)map1 +
+ (thread_id * RIM_CHUNK_SIZE));
+
+ return chunk_start;
+}
+
+/*
+ * The "word-offset" of a word-aligned address inside a chunk, is
+ * defined to be the number of words that precede the address in that
+ * chunk.
+ *
+ * WORD_OFFSET_BITS denote the number of bits required to represent
+ * the word-offsets of all the word-aligned addresses of a chunk.
+ */
+#define WORD_OFFSET_BITS (__builtin_ctz(WORDS_PER_CHUNK))
+#define WORD_OFFSET_MASK ((1 << WORD_OFFSET_BITS) - 1)
+
+static inline unsigned int compute_word_offset(char *start, unsigned int *addr)
+{
+ unsigned int delta_bytes, ret;
+ delta_bytes = (unsigned long)addr - (unsigned long)start;
+
+ ret = delta_bytes/WORD_SIZE;
+
+ return ret;
+}
+
+/*
+ * A "sweep" is defined to be the sequential execution of the
+ * rim-sequence by a rim_thread on its chunk one word at a time,
+ * starting from the first word of its chunk and ending with the last
+ * word of its chunk.
+ *
+ * Each sweep of a rim_thread is uniquely identified by a sweep_id.
+ * SWEEP_ID_BITS denote the number of bits required to represent
+ * the sweep_ids of rim_threads.
+ *
+ * As to why SWEEP_ID_BITS are computed as a function of THREAD_ID_BITS,
+ * WORD_OFFSET_BITS, and WORD_BITS, see the "store-pattern" below.
+ */
+#define SWEEP_ID_BITS (WORD_BITS - (THREAD_ID_BITS + WORD_OFFSET_BITS))
+#define SWEEP_ID_MASK ((1 << SWEEP_ID_BITS) - 1)
+
+/*
+ * A "store-pattern" is the word-pattern that is stored into a word
+ * location in the 4)STORE step of the rim-sequence.
+ *
+ * In the store-pattern, we shall encode:
+ *
+ * - The thread-id of the rim_thread performing the store
+ * (The most significant THREAD_ID_BITS)
+ *
+ * - The word-offset of the address into which the store is being
+ * performed (The next WORD_OFFSET_BITS)
+ *
+ * - The sweep_id of the current sweep in which the store is
+ * being performed. (The lower SWEEP_ID_BITS)
+ *
+ * Store Pattern: 32 bits
+ * |------------------|--------------------|---------------------------------|
+ * | Thread id | Word offset | sweep_id |
+ * |------------------|--------------------|---------------------------------|
+ * THREAD_ID_BITS WORD_OFFSET_BITS SWEEP_ID_BITS
+ *
+ * In the store pattern, the (Thread-id + Word-offset) uniquely identify the
+ * address to which the store is being performed i.e,
+ * address == map1 +
+ * (Thread-id * RIM_CHUNK_SIZE) + (Word-offset * WORD_SIZE)
+ *
+ * And the sweep_id in the store pattern identifies the time when the
+ * store was performed by the rim_thread.
+ *
+ * We shall use this property in the 3)COMPARE step of the
+ * rim-sequence.
+ */
+#define SWEEP_ID_SHIFT 0
+#define WORD_OFFSET_SHIFT (SWEEP_ID_BITS)
+#define THREAD_ID_SHIFT (WORD_OFFSET_BITS + SWEEP_ID_BITS)
+
+/*
+ * Compute the store pattern for a given thread with id @tid, at
+ * location @addr in the sweep identified by @sweep_id
+ */
+static inline unsigned int compute_store_pattern(unsigned int tid,
+ unsigned int *addr,
+ unsigned int sweep_id)
+{
+ unsigned int ret = 0;
+ char *start = compute_chunk_start_addr(tid);
+ unsigned int word_offset = compute_word_offset(start, addr);
+
+ ret += (tid & THREAD_ID_MASK) << THREAD_ID_SHIFT;
+ ret += (word_offset & WORD_OFFSET_MASK) << WORD_OFFSET_SHIFT;
+ ret += (sweep_id & SWEEP_ID_MASK) << SWEEP_ID_SHIFT;
+ return ret;
+}
+
+/* Extract the thread-id from the given store-pattern */
+static inline unsigned int extract_tid(unsigned int pattern)
+{
+ unsigned int ret;
+
+ ret = (pattern >> THREAD_ID_SHIFT) & THREAD_ID_MASK;
+ return ret;
+}
+
+/* Extract the word-offset from the given store-pattern */
+static inline unsigned int extract_word_offset(unsigned int pattern)
+{
+ unsigned int ret;
+
+ ret = (pattern >> WORD_OFFSET_SHIFT) & WORD_OFFSET_MASK;
+
+ return ret;
+}
+
+/* Extract the sweep-id from the given store-pattern */
+static inline unsigned int extract_sweep_id(unsigned int pattern)
+
+{
+ unsigned int ret;
+
+ ret = (pattern >> SWEEP_ID_SHIFT) & SWEEP_ID_MASK;
+
+ return ret;
+}
+
+/************************************************************
+ * *
+ * Logging the output of the verification *
+ * *
+ ************************************************************/
+#define LOGDIR_NAME_SIZE 100
+static char logdir[LOGDIR_NAME_SIZE];
+
+static FILE *fp[MAX_THREADS];
+static const char logfilename[] ="Thread-%02d-Chunk";
+
+static inline void start_verification_log(unsigned int tid,
+ unsigned int *addr,
+ unsigned int cur_sweep_id,
+ unsigned int prev_sweep_id)
+{
+ FILE *f;
+ char logfile[30];
+ char path[LOGDIR_NAME_SIZE + 30];
+ char separator[2] = "/";
+ char *chunk_start = compute_chunk_start_addr(tid);
+ unsigned int size = RIM_CHUNK_SIZE;
+
+ sprintf(logfile, logfilename, tid);
+ strcpy(path, logdir);
+ strcat(path, separator);
+ strcat(path, logfile);
+ f = fopen(path, "w");
+
+ if (!f) {
+ err_msg("Unable to create logfile\n");
+ }
+
+ fp[tid] = f;
+
+ fprintf(f, "----------------------------------------------------------\n");
+ fprintf(f, "PID = %d\n", rim_process_pid);
+ fprintf(f, "Thread id = %02d\n", tid);
+ fprintf(f, "Chunk Start Addr = 0x%016lx\n", (unsigned long)chunk_start);
+ fprintf(f, "Chunk Size = %d\n", size);
+ fprintf(f, "Next Store Addr = 0x%016lx\n", (unsigned long)addr);
+ fprintf(f, "Current sweep-id = 0x%08x\n", cur_sweep_id);
+ fprintf(f, "Previous sweep-id = 0x%08x\n", prev_sweep_id);
+ fprintf(f, "----------------------------------------------------------\n");
+}
+
+static inline void log_anamoly(unsigned int tid, unsigned int *addr,
+ unsigned int expected, unsigned int observed)
+{
+ FILE *f = fp[tid];
+
+ fprintf(f, "Thread %02d: Addr 0x%lx: Expected 0x%x, Observed 0x%x\n",
+ tid, (unsigned long)addr, expected, observed);
+ fprintf(f, "Thread %02d: Expected Thread id = %02d\n", tid, extract_tid(expected));
+ fprintf(f, "Thread %02d: Observed Thread id = %02d\n", tid, extract_tid(observed));
+ fprintf(f, "Thread %02d: Expected Word offset = %03d\n", tid, extract_word_offset(expected));
+ fprintf(f, "Thread %02d: Observed Word offset = %03d\n", tid, extract_word_offset(observed));
+ fprintf(f, "Thread %02d: Expected sweep-id = 0x%x\n", tid, extract_sweep_id(expected));
+ fprintf(f, "Thread %02d: Observed sweep-id = 0x%x\n", tid, extract_sweep_id(observed));
+ fprintf(f, "----------------------------------------------------------\n");
+}
+
+static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies)
+{
+ FILE *f = fp[tid];
+ char logfile[30];
+ char path[LOGDIR_NAME_SIZE + 30];
+ char separator[] = "/";
+
+ fclose(f);
+
+ if (nr_anamolies == 0) {
+ remove(path);
+ return;
+ }
+
+ sprintf(logfile, logfilename, tid);
+ strcpy(path, logdir);
+ strcat(path, separator);
+ strcat(path, logfile);
+
+ printf("Thread %02d chunk has %d corrupted words. For details check %s\n",
+ tid, nr_anamolies, path);
+}
+
+/*
+ * When a COMPARE step of a rim-sequence fails, the rim_thread informs
+ * everyone else via the shared_memory pointed to by
+ * corruption_found variable. On seeing this, every thread verifies the
+ * content of its chunk as follows.
+ *
+ * Suppose a thread identified with @tid was about to store (but not
+ * yet stored) to @next_store_addr in its current sweep identified
+ * @cur_sweep_id. Let @prev_sweep_id indicate the previous sweep_id.
+ *
+ * This implies that for all the addresses @addr < @next_store_addr,
+ * Thread @tid has already performed a store as part of its current
+ * sweep. Hence we expect the content of such @addr to be:
+ * |-------------------------------------------------|
+ * | tid | word_offset(addr) | cur_sweep_id |
+ * |-------------------------------------------------|
+ *
+ * Since Thread @tid is yet to perform stores on address
+ * @next_store_addr and above, we expect the content of such an
+ * address @addr to be:
+ * |-------------------------------------------------|
+ * | tid | word_offset(addr) | prev_sweep_id |
+ * |-------------------------------------------------|
+ *
+ * The verifier function @verify_chunk does this verification and logs
+ * any anamolies that it finds.
+ */
+static void verify_chunk(unsigned int tid, unsigned int *next_store_addr,
+ unsigned int cur_sweep_id,
+ unsigned int prev_sweep_id)
+{
+ unsigned int *iter_ptr;
+ unsigned int size = RIM_CHUNK_SIZE;
+ unsigned int expected;
+ unsigned int observed;
+ char *chunk_start = compute_chunk_start_addr(tid);
+
+ int nr_anamolies = 0;
+
+ start_verification_log(tid, next_store_addr,
+ cur_sweep_id, prev_sweep_id);
+
+ for (iter_ptr = (unsigned int *)chunk_start;
+ (unsigned long)iter_ptr < (unsigned long)chunk_start + size;
+ iter_ptr++) {
+ unsigned int expected_sweep_id;
+
+ if (iter_ptr < next_store_addr) {
+ expected_sweep_id = cur_sweep_id;
+ } else {
+ expected_sweep_id = prev_sweep_id;
+ }
+
+ expected = compute_store_pattern(tid, iter_ptr, expected_sweep_id);
+
+ dcbf((volatile unsigned int*)iter_ptr); //Flush before reading
+ observed = *iter_ptr;
+
+ if (observed != expected) {
+ nr_anamolies++;
+ log_anamoly(tid, iter_ptr, expected, observed);
+ }
+ }
+
+ end_verification_log(tid, nr_anamolies);
+}
+
+static void set_pthread_cpu(pthread_t th, int cpu)
+{
+ cpu_set_t run_cpu_mask;
+ struct sched_param param;
+
+ CPU_ZERO(&run_cpu_mask);
+ CPU_SET(cpu, &run_cpu_mask);
+ pthread_setaffinity_np(th, sizeof(cpu_set_t), &run_cpu_mask);
+
+ param.sched_priority = 1;
+ if (0 && sched_setscheduler(0, SCHED_FIFO, &param) == -1) {
+ /* haven't reproduced with this setting, it kills random preemption which may be a factor */
+ fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
+ }
+}
+
+static void set_mycpu(int cpu)
+{
+ cpu_set_t run_cpu_mask;
+ struct sched_param param;
+
+ CPU_ZERO(&run_cpu_mask);
+ CPU_SET(cpu, &run_cpu_mask);
+ sched_setaffinity(0, sizeof(cpu_set_t), &run_cpu_mask);
+
+ param.sched_priority = 1;
+ if (0 && sched_setscheduler(0, SCHED_FIFO, &param) == -1) {
+ fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
+ }
+}
+
+static volatile int segv_wait;
+
+static void segv_handler(int signo, siginfo_t *info, void *extra)
+{
+ while (segv_wait) {
+ sched_yield();
+ }
+
+}
+
+static void set_segv_handler(void)
+{
+ struct sigaction sa;
+
+ sa.sa_flags = SA_SIGINFO;
+ sa.sa_sigaction = segv_handler;
+
+ if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+}
+
+int timeout = 0;
+/*
+ * This function is executed by every rim_thread.
+ *
+ * This function performs sweeps over the exclusive chunks of the
+ * rim_threads executing the rim-sequence one word at a time.
+ */
+static void *rim_fn(void *arg)
+{
+ unsigned int tid = *((unsigned int *)arg);
+
+ int size = RIM_CHUNK_SIZE;
+ char *chunk_start = compute_chunk_start_addr(tid);
+
+ unsigned int prev_sweep_id;
+ unsigned int cur_sweep_id = 0;
+
+ /* word access */
+ unsigned int pattern = cur_sweep_id;
+ unsigned int *pattern_ptr = &pattern;
+ unsigned int *w_ptr, read_data;
+
+ set_segv_handler();
+
+ /*
+ * Let us initialize the chunk:
+ *
+ * Each word-aligned address addr in the chunk,
+ * is initialized to :
+ * |-------------------------------------------------|
+ * | tid | word_offset(addr) | 0 |
+ * |-------------------------------------------------|
+ */
+ for (w_ptr = (unsigned int *)chunk_start;
+ (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
+ w_ptr++) {
+
+ *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
+ *w_ptr = *pattern_ptr;
+ }
+
+ while (!corruption_found && !timeout) {
+ prev_sweep_id = cur_sweep_id;
+ cur_sweep_id = cur_sweep_id + 1;
+
+ for (w_ptr = (unsigned int *)chunk_start;
+ (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
+ w_ptr++) {
+ unsigned int old_pattern;
+
+ /*
+ * Compute the pattern that we would have
+ * stored at this location in the previous
+ * sweep.
+ */
+ old_pattern = compute_store_pattern(tid, w_ptr, prev_sweep_id);
+
+ /*
+ * FLUSH:Ensure that we flush the contents of
+ * the cache before loading
+ */
+ dcbf((volatile unsigned int*)w_ptr); //Flush
+
+ /* LOAD: Read the value */
+ read_data = *w_ptr; //Load
+
+ /*
+ * COMPARE: Is it the same as what we had stored
+ * in the previous sweep ? It better be!
+ */
+ if (read_data != old_pattern) {
+ /* No it isn't! Tell everyone */
+ corruption_found = 1;
+ }
+
+ /*
+ * Before performing a store, let us check if
+ * any rim_thread has found a corruption.
+ */
+ if (corruption_found || timeout) {
+ /*
+ * Yes. Someone (including us!) has found
+ * a corruption :(
+ *
+ * Let us verify that our chunk is
+ * correct.
+ */
+ /* But first, let us allow the dust to settle down! */
+ verify_chunk(tid, w_ptr, cur_sweep_id, prev_sweep_id);
+
+ return 0;
+ }
+
+ /*
+ * Compute the new pattern that we are going
+ * to write to this location
+ */
+ *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
+
+ /*
+ * STORE: Now let us write this pattern into
+ * the location
+ */
+ *w_ptr = *pattern_ptr;
+ }
+ }
+
+ return NULL;
+}
+
+
+static unsigned long start_cpu = 0;
+static unsigned long nrthreads = 4;
+
+static pthread_t mem_snapshot_thread;
+
+static void *mem_snapshot_fn(void *arg)
+{
+ int page_size = getpagesize();
+ size_t size = page_size;
+ void *tmp = malloc(size);
+
+ while (!corruption_found && !timeout) {
+ /* Stop memory migration once corruption is found */
+ segv_wait = 1;
+
+ mprotect(map1, size, PROT_READ);
+
+ /*
+ * Load from the working alias (map1). Loading from map2
+ * also fails.
+ */
+ memcpy(tmp, map1, size);
+
+ /*
+ * Stores must go via map2 which has write permissions, but
+ * the corrupted data tends to be seen in the snapshot buffer,
+ * so corruption does not appear to be introduced at the
+ * copy-back via map2 alias here.
+ */
+ memcpy(map2, tmp, size);
+ /*
+ * Before releasing other threads, must ensure the copy
+ * back to
+ */
+ asm volatile("sync" ::: "memory");
+ mprotect(map1, size, PROT_READ|PROT_WRITE);
+ asm volatile("sync" ::: "memory");
+ segv_wait = 0;
+
+ usleep(1); /* This value makes a big difference */
+ }
+
+ return 0;
+}
+
+void alrm_sighandler(int sig)
+{
+ timeout = 1;
+}
+
+int main(int argc, char *argv[])
+{
+ int c;
+ int page_size = getpagesize();
+ time_t now;
+ int i, dir_error;
+ pthread_attr_t attr;
+ key_t shm_key = (key_t) getpid();
+ int shmid, run_time = 20 * 60;
+ struct sigaction sa_alrm;
+
+ snprintf(logdir, LOGDIR_NAME_SIZE,
+ "/tmp/logdir-%u", (unsigned int)getpid());
+ while ((c = getopt(argc, argv, "r:hn:l:t:")) != -1) {
+ switch(c) {
+ case 'r':
+ start_cpu = strtoul(optarg, NULL, 10);
+ break;
+ case 'h':
+ printf("%s [-r <start_cpu>] [-n <nrthreads>] [-l <logdir>] [-t <timeout>]\n", argv[0]);
+ exit(0);
+ break;
+ case 'n':
+ nrthreads = strtoul(optarg, NULL, 10);
+ break;
+ case 'l':
+ strncpy(logdir, optarg, LOGDIR_NAME_SIZE - 1);
+ break;
+ case 't':
+ run_time = strtoul(optarg, NULL, 10);
+ break;
+ default:
+ printf("invalid option\n");
+ exit(0);
+ break;
+ }
+ }
+
+ if (nrthreads > MAX_THREADS)
+ nrthreads = MAX_THREADS;
+
+ shmid = shmget(shm_key, page_size, IPC_CREAT|0666);
+ if (shmid < 0) {
+ err_msg("Failed shmget\n");
+ }
+
+ map1 = shmat(shmid, NULL, 0);
+ if (map1 == (void *) -1) {
+ err_msg("Failed shmat");
+ }
+
+ map2 = shmat(shmid, NULL, 0);
+ if (map2 == (void *) -1) {
+ err_msg("Failed shmat");
+ }
+
+ dir_error = mkdir(logdir, 0755);
+
+ if (dir_error) {
+ err_msg("Failed mkdir");
+ }
+
+ printf("start_cpu list:%lu\n", start_cpu);
+ printf("number of worker threads:%lu + 1 snapshot thread\n", nrthreads);
+ printf("Allocated address:0x%016lx + secondary map:0x%016lx\n", (unsigned long)map1, (unsigned long)map2);
+ printf("logdir at : %s\n", logdir);
+ printf("Timeout: %d seconds\n", run_time);
+
+ time(&now);
+ printf("=================================\n");
+ printf(" Starting Test\n");
+ printf(" %s", ctime(&now));
+ printf("=================================\n");
+
+ for (i = 0; i < nrthreads; i++) {
+ if (1 && !fork()) {
+ prctl(PR_SET_PDEATHSIG, SIGKILL);
+ set_mycpu(start_cpu + i);
+ for (;;)
+ sched_yield();
+ exit(0);
+ }
+ }
+
+
+ sa_alrm.sa_handler = &alrm_sighandler;
+ sigemptyset(&sa_alrm.sa_mask);
+ sa_alrm.sa_flags = 0;
+
+ if (sigaction(SIGALRM, &sa_alrm, 0) == -1) {
+ err_msg("Failed signal handler registration\n");
+ }
+
+ alarm(run_time);
+
+ pthread_attr_init(&attr);
+ for (i = 0; i < nrthreads; i++) {
+ rim_thread_ids[i] = i;
+ pthread_create(&rim_threads[i], &attr, rim_fn, &rim_thread_ids[i]);
+ set_pthread_cpu(rim_threads[i], start_cpu + i);
+ }
+
+ pthread_create(&mem_snapshot_thread, &attr, mem_snapshot_fn, map1);
+ set_pthread_cpu(mem_snapshot_thread, start_cpu + i);
+
+
+ pthread_join(mem_snapshot_thread, NULL);
+ for (i = 0; i < nrthreads; i++) {
+ pthread_join(rim_threads[i], NULL);
+ }
+
+ if (!timeout) {
+ time(&now);
+ printf("=================================\n");
+ printf(" Data Corruption Detected\n");
+ printf(" %s", ctime(&now));
+ printf(" See logfiles in %s\n", logdir);
+ printf("=================================\n");
+ return 1;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/pmu/.gitignore b/tools/testing/selftests/powerpc/pmu/.gitignore
new file mode 100644
index 000000000..e748f336e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/.gitignore
@@ -0,0 +1,3 @@
+count_instructions
+l3_bank_test
+per_event_excludes
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
new file mode 100644
index 000000000..19046db99
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+ $(MAKE) -C ../
+
+TEST_GEN_PROGS := count_instructions l3_bank_test per_event_excludes
+EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+all: $(TEST_GEN_PROGS) ebb
+
+$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
+
+# loop.S can only be built 64-bit
+$(OUTPUT)/count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
+ $(CC) $(CFLAGS) -m64 -o $@ $^
+
+$(OUTPUT)/per_event_excludes: ../utils.c
+
+DEFAULT_RUN_TESTS := $(RUN_TESTS)
+override define RUN_TESTS
+ $(DEFAULT_RUN_TESTS)
+ TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
+endef
+
+DEFAULT_EMIT_TESTS := $(EMIT_TESTS)
+override define EMIT_TESTS
+ $(DEFAULT_EMIT_TESTS)
+ TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
+endef
+
+DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
+override define INSTALL_RULE
+ $(DEFAULT_INSTALL_RULE)
+ TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install
+endef
+
+clean:
+ $(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o
+ TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean
+
+ebb:
+ TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
+
+.PHONY: all run_tests clean ebb
diff --git a/tools/testing/selftests/powerpc/pmu/count_instructions.c b/tools/testing/selftests/powerpc/pmu/count_instructions.c
new file mode 100644
index 000000000..4622117b2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/count_instructions.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "event.h"
+#include "utils.h"
+#include "lib.h"
+
+extern void thirty_two_instruction_loop(u64 loops);
+
+static void setup_event(struct event *e, u64 config, char *name)
+{
+ event_init_opts(e, config, PERF_TYPE_HARDWARE, name);
+
+ e->attr.disabled = 1;
+ e->attr.exclude_kernel = 1;
+ e->attr.exclude_hv = 1;
+ e->attr.exclude_idle = 1;
+}
+
+static int do_count_loop(struct event *events, u64 instructions,
+ u64 overhead, bool report)
+{
+ s64 difference, expected;
+ double percentage;
+
+ prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+ /* Run for 1M instructions */
+ thirty_two_instruction_loop(instructions >> 5);
+
+ prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+ event_read(&events[0]);
+ event_read(&events[1]);
+
+ expected = instructions + overhead;
+ difference = events[0].result.value - expected;
+ percentage = (double)difference / events[0].result.value * 100;
+
+ if (report) {
+ event_report(&events[0]);
+ event_report(&events[1]);
+
+ printf("Looped for %llu instructions, overhead %llu\n", instructions, overhead);
+ printf("Expected %llu\n", expected);
+ printf("Actual %llu\n", events[0].result.value);
+ printf("Delta %lld, %f%%\n", difference, percentage);
+ }
+
+ event_reset(&events[0]);
+ event_reset(&events[1]);
+
+ if (difference < 0)
+ difference = -difference;
+
+ /* Tolerate a difference below 0.0001 % */
+ difference *= 10000 * 100;
+ if (difference / events[0].result.value)
+ return -1;
+
+ return 0;
+}
+
+/* Count how many instructions it takes to do a null loop */
+static u64 determine_overhead(struct event *events)
+{
+ u64 current, overhead;
+ int i;
+
+ do_count_loop(events, 0, 0, false);
+ overhead = events[0].result.value;
+
+ for (i = 0; i < 100; i++) {
+ do_count_loop(events, 0, 0, false);
+ current = events[0].result.value;
+ if (current < overhead) {
+ printf("Replacing overhead %llu with %llu\n", overhead, current);
+ overhead = current;
+ }
+ }
+
+ return overhead;
+}
+
+static int test_body(void)
+{
+ struct event events[2];
+ u64 overhead;
+
+ setup_event(&events[0], PERF_COUNT_HW_INSTRUCTIONS, "instructions");
+ setup_event(&events[1], PERF_COUNT_HW_CPU_CYCLES, "cycles");
+
+ if (event_open(&events[0])) {
+ perror("perf_event_open");
+ return -1;
+ }
+
+ if (event_open_with_group(&events[1], events[0].fd)) {
+ perror("perf_event_open");
+ return -1;
+ }
+
+ overhead = determine_overhead(events);
+ printf("Overhead of null loop: %llu instructions\n", overhead);
+
+ /* Run for 1Mi instructions */
+ FAIL_IF(do_count_loop(events, 1000000, overhead, true));
+
+ /* Run for 10Mi instructions */
+ FAIL_IF(do_count_loop(events, 10000000, overhead, true));
+
+ /* Run for 100Mi instructions */
+ FAIL_IF(do_count_loop(events, 100000000, overhead, true));
+
+ /* Run for 1Bi instructions */
+ FAIL_IF(do_count_loop(events, 1000000000, overhead, true));
+
+ /* Run for 16Bi instructions */
+ FAIL_IF(do_count_loop(events, 16000000000, overhead, true));
+
+ /* Run for 64Bi instructions */
+ FAIL_IF(do_count_loop(events, 64000000000, overhead, true));
+
+ event_close(&events[0]);
+ event_close(&events[1]);
+
+ return 0;
+}
+
+static int count_instructions(void)
+{
+ return eat_cpu(test_body);
+}
+
+int main(void)
+{
+ return test_harness(count_instructions, "count_instructions");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
new file mode 100644
index 000000000..42bddbed8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
@@ -0,0 +1,22 @@
+reg_access_test
+event_attributes_test
+cycles_test
+cycles_with_freeze_test
+pmc56_overflow_test
+ebb_vs_cpu_event_test
+cpu_event_vs_ebb_test
+cpu_event_pinned_vs_ebb_test
+task_event_vs_ebb_test
+task_event_pinned_vs_ebb_test
+multi_ebb_procs_test
+multi_counter_test
+pmae_handling_test
+close_clears_pmcc_test
+instruction_count_test
+fork_cleanup_test
+ebb_on_child_test
+ebb_on_willing_child_test
+back_to_back_ebbs_test
+lost_exception_test
+no_handler_test
+cycles_with_mmcr2_test
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
new file mode 100644
index 000000000..bd5dfa509
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+ $(MAKE) -C ../../
+
+# The EBB handler is 64-bit code and everything links against it
+CFLAGS += -m64
+
+TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \
+ cycles_with_freeze_test pmc56_overflow_test \
+ ebb_vs_cpu_event_test cpu_event_vs_ebb_test \
+ cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test \
+ task_event_pinned_vs_ebb_test multi_ebb_procs_test \
+ multi_counter_test pmae_handling_test \
+ close_clears_pmcc_test instruction_count_test \
+ fork_cleanup_test ebb_on_child_test \
+ ebb_on_willing_child_test back_to_back_ebbs_test \
+ lost_exception_test no_handler_test \
+ cycles_with_mmcr2_test
+
+top_srcdir = ../../../../../..
+include ../../../lib.mk
+
+$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \
+ ebb.c ebb_handler.S trace.c busy_loop.S
+
+$(OUTPUT)/instruction_count_test: ../loop.S
+
+$(OUTPUT)/lost_exception_test: ../lib.c
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
new file mode 100644
index 000000000..031baa436
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+#define NUMBER_OF_EBBS 50
+
+/*
+ * Test that if we overflow the counter while in the EBB handler, we take
+ * another EBB on exiting from the handler.
+ *
+ * We do this by counting with a stupidly low sample period, causing us to
+ * overflow the PMU while we're still in the EBB handler, leading to another
+ * EBB.
+ *
+ * We get out of what would otherwise be an infinite loop by leaving the
+ * counter frozen once we've taken enough EBBs.
+ */
+
+static void ebb_callee(void)
+{
+ uint64_t siar, val;
+
+ val = mfspr(SPRN_BESCR);
+ if (!(val & BESCR_PMEO)) {
+ ebb_state.stats.spurious++;
+ goto out;
+ }
+
+ ebb_state.stats.ebb_count++;
+ trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+ /* Resets the PMC */
+ count_pmc(1, sample_period);
+
+out:
+ if (ebb_state.stats.ebb_count == NUMBER_OF_EBBS)
+ /* Reset but leave counters frozen */
+ reset_ebb_with_clear_mask(MMCR0_PMAO);
+ else
+ /* Unfreezes */
+ reset_ebb();
+
+ /* Do some stuff to chew some cycles and pop the counter */
+ siar = mfspr(SPRN_SIAR);
+ trace_log_reg(ebb_state.trace, SPRN_SIAR, siar);
+
+ val = mfspr(SPRN_PMC1);
+ trace_log_reg(ebb_state.trace, SPRN_PMC1, val);
+
+ val = mfspr(SPRN_MMCR0);
+ trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+}
+
+int back_to_back_ebbs(void)
+{
+ struct event event;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ setup_ebb_handler(ebb_callee);
+
+ FAIL_IF(ebb_event_enable(&event));
+
+ sample_period = 5;
+
+ ebb_freeze_pmcs();
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+ ebb_global_enable();
+ ebb_unfreeze_pmcs();
+
+ while (ebb_state.stats.ebb_count < NUMBER_OF_EBBS)
+ FAIL_IF(core_busy_loop());
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count != NUMBER_OF_EBBS);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(back_to_back_ebbs, "back_to_back_ebbs");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S
new file mode 100644
index 000000000..c7e4093f1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+
+ .text
+
+FUNC_START(core_busy_loop)
+ stdu %r1, -168(%r1)
+ std r14, 160(%r1)
+ std r15, 152(%r1)
+ std r16, 144(%r1)
+ std r17, 136(%r1)
+ std r18, 128(%r1)
+ std r19, 120(%r1)
+ std r20, 112(%r1)
+ std r21, 104(%r1)
+ std r22, 96(%r1)
+ std r23, 88(%r1)
+ std r24, 80(%r1)
+ std r25, 72(%r1)
+ std r26, 64(%r1)
+ std r27, 56(%r1)
+ std r28, 48(%r1)
+ std r29, 40(%r1)
+ std r30, 32(%r1)
+ std r31, 24(%r1)
+
+ li r3, 0x3030
+ std r3, -96(%r1)
+ li r4, 0x4040
+ std r4, -104(%r1)
+ li r5, 0x5050
+ std r5, -112(%r1)
+ li r6, 0x6060
+ std r6, -120(%r1)
+ li r7, 0x7070
+ std r7, -128(%r1)
+ li r8, 0x0808
+ std r8, -136(%r1)
+ li r9, 0x0909
+ std r9, -144(%r1)
+ li r10, 0x1010
+ std r10, -152(%r1)
+ li r11, 0x1111
+ std r11, -160(%r1)
+ li r14, 0x1414
+ std r14, -168(%r1)
+ li r15, 0x1515
+ std r15, -176(%r1)
+ li r16, 0x1616
+ std r16, -184(%r1)
+ li r17, 0x1717
+ std r17, -192(%r1)
+ li r18, 0x1818
+ std r18, -200(%r1)
+ li r19, 0x1919
+ std r19, -208(%r1)
+ li r20, 0x2020
+ std r20, -216(%r1)
+ li r21, 0x2121
+ std r21, -224(%r1)
+ li r22, 0x2222
+ std r22, -232(%r1)
+ li r23, 0x2323
+ std r23, -240(%r1)
+ li r24, 0x2424
+ std r24, -248(%r1)
+ li r25, 0x2525
+ std r25, -256(%r1)
+ li r26, 0x2626
+ std r26, -264(%r1)
+ li r27, 0x2727
+ std r27, -272(%r1)
+ li r28, 0x2828
+ std r28, -280(%r1)
+ li r29, 0x2929
+ std r29, -288(%r1)
+ li r30, 0x3030
+ li r31, 0x3131
+
+ li r3, 0
+0: addi r3, r3, 1
+ cmpwi r3, 100
+ blt 0b
+
+ /* Return 1 (fail) unless we get through all the checks */
+ li r3, 1
+
+ /* Check none of our registers have been corrupted */
+ cmpwi r4, 0x4040
+ bne 1f
+ cmpwi r5, 0x5050
+ bne 1f
+ cmpwi r6, 0x6060
+ bne 1f
+ cmpwi r7, 0x7070
+ bne 1f
+ cmpwi r8, 0x0808
+ bne 1f
+ cmpwi r9, 0x0909
+ bne 1f
+ cmpwi r10, 0x1010
+ bne 1f
+ cmpwi r11, 0x1111
+ bne 1f
+ cmpwi r14, 0x1414
+ bne 1f
+ cmpwi r15, 0x1515
+ bne 1f
+ cmpwi r16, 0x1616
+ bne 1f
+ cmpwi r17, 0x1717
+ bne 1f
+ cmpwi r18, 0x1818
+ bne 1f
+ cmpwi r19, 0x1919
+ bne 1f
+ cmpwi r20, 0x2020
+ bne 1f
+ cmpwi r21, 0x2121
+ bne 1f
+ cmpwi r22, 0x2222
+ bne 1f
+ cmpwi r23, 0x2323
+ bne 1f
+ cmpwi r24, 0x2424
+ bne 1f
+ cmpwi r25, 0x2525
+ bne 1f
+ cmpwi r26, 0x2626
+ bne 1f
+ cmpwi r27, 0x2727
+ bne 1f
+ cmpwi r28, 0x2828
+ bne 1f
+ cmpwi r29, 0x2929
+ bne 1f
+ cmpwi r30, 0x3030
+ bne 1f
+ cmpwi r31, 0x3131
+ bne 1f
+
+ /* Load junk into all our registers before we reload them from the stack. */
+ li r3, 0xde
+ li r4, 0xad
+ li r5, 0xbe
+ li r6, 0xef
+ li r7, 0xde
+ li r8, 0xad
+ li r9, 0xbe
+ li r10, 0xef
+ li r11, 0xde
+ li r14, 0xad
+ li r15, 0xbe
+ li r16, 0xef
+ li r17, 0xde
+ li r18, 0xad
+ li r19, 0xbe
+ li r20, 0xef
+ li r21, 0xde
+ li r22, 0xad
+ li r23, 0xbe
+ li r24, 0xef
+ li r25, 0xde
+ li r26, 0xad
+ li r27, 0xbe
+ li r28, 0xef
+ li r29, 0xdd
+
+ ld r3, -96(%r1)
+ cmpwi r3, 0x3030
+ bne 1f
+ ld r4, -104(%r1)
+ cmpwi r4, 0x4040
+ bne 1f
+ ld r5, -112(%r1)
+ cmpwi r5, 0x5050
+ bne 1f
+ ld r6, -120(%r1)
+ cmpwi r6, 0x6060
+ bne 1f
+ ld r7, -128(%r1)
+ cmpwi r7, 0x7070
+ bne 1f
+ ld r8, -136(%r1)
+ cmpwi r8, 0x0808
+ bne 1f
+ ld r9, -144(%r1)
+ cmpwi r9, 0x0909
+ bne 1f
+ ld r10, -152(%r1)
+ cmpwi r10, 0x1010
+ bne 1f
+ ld r11, -160(%r1)
+ cmpwi r11, 0x1111
+ bne 1f
+ ld r14, -168(%r1)
+ cmpwi r14, 0x1414
+ bne 1f
+ ld r15, -176(%r1)
+ cmpwi r15, 0x1515
+ bne 1f
+ ld r16, -184(%r1)
+ cmpwi r16, 0x1616
+ bne 1f
+ ld r17, -192(%r1)
+ cmpwi r17, 0x1717
+ bne 1f
+ ld r18, -200(%r1)
+ cmpwi r18, 0x1818
+ bne 1f
+ ld r19, -208(%r1)
+ cmpwi r19, 0x1919
+ bne 1f
+ ld r20, -216(%r1)
+ cmpwi r20, 0x2020
+ bne 1f
+ ld r21, -224(%r1)
+ cmpwi r21, 0x2121
+ bne 1f
+ ld r22, -232(%r1)
+ cmpwi r22, 0x2222
+ bne 1f
+ ld r23, -240(%r1)
+ cmpwi r23, 0x2323
+ bne 1f
+ ld r24, -248(%r1)
+ cmpwi r24, 0x2424
+ bne 1f
+ ld r25, -256(%r1)
+ cmpwi r25, 0x2525
+ bne 1f
+ ld r26, -264(%r1)
+ cmpwi r26, 0x2626
+ bne 1f
+ ld r27, -272(%r1)
+ cmpwi r27, 0x2727
+ bne 1f
+ ld r28, -280(%r1)
+ cmpwi r28, 0x2828
+ bne 1f
+ ld r29, -288(%r1)
+ cmpwi r29, 0x2929
+ bne 1f
+
+ /* Load 0 (success) to return */
+ li r3, 0
+
+1: ld r14, 160(%r1)
+ ld r15, 152(%r1)
+ ld r16, 144(%r1)
+ ld r17, 136(%r1)
+ ld r18, 128(%r1)
+ ld r19, 120(%r1)
+ ld r20, 112(%r1)
+ ld r21, 104(%r1)
+ ld r22, 96(%r1)
+ ld r23, 88(%r1)
+ ld r24, 80(%r1)
+ ld r25, 72(%r1)
+ ld r26, 64(%r1)
+ ld r27, 56(%r1)
+ ld r28, 48(%r1)
+ ld r29, 40(%r1)
+ ld r30, 32(%r1)
+ ld r31, 24(%r1)
+ addi %r1, %r1, 168
+ blr
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c b/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c
new file mode 100644
index 000000000..ac18cf617
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that closing the EBB event clears MMCR0_PMCC, preventing further access
+ * by userspace to the PMU hardware.
+ */
+
+int close_clears_pmcc(void)
+{
+ struct event event;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ FAIL_IF(event_open(&event));
+
+ ebb_enable_pmc_counting(1);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ while (ebb_state.stats.ebb_count < 1)
+ FAIL_IF(core_busy_loop());
+
+ ebb_global_disable();
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+ /* The real test is here, do we take a SIGILL when writing PMU regs now
+ * that we have closed the event. We expect that we will. */
+
+ FAIL_IF(catch_sigill(write_pmc1));
+
+ /* We should still be able to read EBB regs though */
+ mfspr(SPRN_EBBHR);
+ mfspr(SPRN_EBBRR);
+ mfspr(SPRN_BESCR);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(close_clears_pmcc, "close_clears_pmcc");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
new file mode 100644
index 000000000..f0632e7fd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a pinned cpu event vs an EBB - in that order. The pinned cpu event
+ * should remain and the EBB event should fail to enable.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+ event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+ event->attr.pinned = 1;
+
+ event->attr.exclude_kernel = 1;
+ event->attr.exclude_hv = 1;
+ event->attr.exclude_idle = 1;
+
+ SKIP_IF(require_paranoia_below(1));
+ FAIL_IF(event_open_with_cpu(event, cpu));
+ FAIL_IF(event_enable(event));
+
+ return 0;
+}
+
+int cpu_event_pinned_vs_ebb(void)
+{
+ union pipe read_pipe, write_pipe;
+ struct event event;
+ int cpu, rc;
+ pid_t pid;
+
+ SKIP_IF(!ebb_is_supported());
+
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+ FAIL_IF(bind_to_cpu(cpu));
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ /* NB order of pipes looks reversed */
+ exit(ebb_child(write_pipe, read_pipe));
+ }
+
+ /* We setup the cpu event first */
+ rc = setup_cpu_event(&event, cpu);
+ if (rc) {
+ kill_child_and_wait(pid);
+ return rc;
+ }
+
+ /* Signal the child to install its EBB event and wait */
+ if (sync_with_child(read_pipe, write_pipe))
+ /* If it fails, wait for it to exit */
+ goto wait;
+
+ /* Signal the child to run */
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+ /* We expect it to fail to read the event */
+ FAIL_IF(wait_for_child(pid) != 2);
+
+ FAIL_IF(event_disable(&event));
+ FAIL_IF(event_read(&event));
+
+ event_report(&event);
+
+ /* The cpu event should have run */
+ FAIL_IF(event.result.value == 0);
+ FAIL_IF(event.result.enabled != event.result.running);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(cpu_event_pinned_vs_ebb, "cpu_event_pinned_vs_ebb");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
new file mode 100644
index 000000000..33e56a234
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a cpu event vs an EBB - in that order. The EBB should force the cpu
+ * event off the PMU.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+ event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+ event->attr.exclude_kernel = 1;
+ event->attr.exclude_hv = 1;
+ event->attr.exclude_idle = 1;
+
+ SKIP_IF(require_paranoia_below(1));
+ FAIL_IF(event_open_with_cpu(event, cpu));
+ FAIL_IF(event_enable(event));
+
+ return 0;
+}
+
+int cpu_event_vs_ebb(void)
+{
+ union pipe read_pipe, write_pipe;
+ struct event event;
+ int cpu, rc;
+ pid_t pid;
+
+ SKIP_IF(!ebb_is_supported());
+
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+ FAIL_IF(bind_to_cpu(cpu));
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ /* NB order of pipes looks reversed */
+ exit(ebb_child(write_pipe, read_pipe));
+ }
+
+ /* We setup the cpu event first */
+ rc = setup_cpu_event(&event, cpu);
+ if (rc) {
+ kill_child_and_wait(pid);
+ return rc;
+ }
+
+ /* Signal the child to install its EBB event and wait */
+ if (sync_with_child(read_pipe, write_pipe))
+ /* If it fails, wait for it to exit */
+ goto wait;
+
+ /* Signal the child to run */
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+ /* We expect the child to succeed */
+ FAIL_IF(wait_for_child(pid));
+
+ FAIL_IF(event_disable(&event));
+ FAIL_IF(event_read(&event));
+
+ event_report(&event);
+
+ /* The cpu event may have run */
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(cpu_event_vs_ebb, "cpu_event_vs_ebb");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
new file mode 100644
index 000000000..361e0be9d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Basic test that counts user cycles and takes EBBs.
+ */
+int cycles(void)
+{
+ struct event event;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ ebb_enable_pmc_counting(1);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ while (ebb_state.stats.ebb_count < 10) {
+ FAIL_IF(core_busy_loop());
+ FAIL_IF(ebb_check_mmcr0());
+ }
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+ FAIL_IF(!ebb_check_count(1, sample_period, 100));
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(cycles, "cycles");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
new file mode 100644
index 000000000..fe7d0dc2a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test of counting cycles while using MMCR0_FC (freeze counters) to only count
+ * parts of the code. This is complicated by the fact that FC is set by the
+ * hardware when the event overflows. We may take the EBB after we have set FC,
+ * so we have to be careful about whether we clear FC at the end of the EBB
+ * handler or not.
+ */
+
+static bool counters_frozen = false;
+static int ebbs_while_frozen = 0;
+
+static void ebb_callee(void)
+{
+ uint64_t mask, val;
+
+ mask = MMCR0_PMAO | MMCR0_FC;
+
+ val = mfspr(SPRN_BESCR);
+ if (!(val & BESCR_PMEO)) {
+ ebb_state.stats.spurious++;
+ goto out;
+ }
+
+ ebb_state.stats.ebb_count++;
+ trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+ val = mfspr(SPRN_MMCR0);
+ trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+
+ if (counters_frozen) {
+ trace_log_string(ebb_state.trace, "frozen");
+ ebbs_while_frozen++;
+ mask &= ~MMCR0_FC;
+ }
+
+ count_pmc(1, sample_period);
+out:
+ reset_ebb_with_clear_mask(mask);
+}
+
+int cycles_with_freeze(void)
+{
+ struct event event;
+ uint64_t val;
+ bool fc_cleared;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ setup_ebb_handler(ebb_callee);
+ ebb_global_enable();
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ fc_cleared = false;
+
+ /* Make sure we loop until we take at least one EBB */
+ while ((ebb_state.stats.ebb_count < 20 && !fc_cleared) ||
+ ebb_state.stats.ebb_count < 1)
+ {
+ counters_frozen = false;
+ mb();
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+
+ FAIL_IF(core_busy_loop());
+
+ counters_frozen = true;
+ mb();
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+
+ val = mfspr(SPRN_MMCR0);
+ if (! (val & MMCR0_FC)) {
+ printf("Outside of loop, FC NOT set MMCR0 0x%lx\n", val);
+ fc_cleared = true;
+ }
+ }
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ printf("EBBs while frozen %d\n", ebbs_while_frozen);
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+ FAIL_IF(fc_cleared);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(cycles_with_freeze, "cycles_with_freeze");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
new file mode 100644
index 000000000..b9b30f974
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test of counting cycles while manipulating the user accessible bits in MMCR2.
+ */
+
+/* We use two values because the first freezes PMC1 and so we would get no EBBs */
+#define MMCR2_EXPECTED_1 0x4020100804020000UL /* (FC1P|FC2P|FC3P|FC4P|FC5P|FC6P) */
+#define MMCR2_EXPECTED_2 0x0020100804020000UL /* ( FC2P|FC3P|FC4P|FC5P|FC6P) */
+
+
+int cycles_with_mmcr2(void)
+{
+ struct event event;
+ uint64_t val, expected[2], actual;
+ int i;
+ bool bad_mmcr2;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ ebb_enable_pmc_counting(1);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ /* XXX Set of MMCR2 must be after enable */
+ expected[0] = MMCR2_EXPECTED_1;
+ expected[1] = MMCR2_EXPECTED_2;
+ i = 0;
+ bad_mmcr2 = false;
+
+ /* Make sure we loop until we take at least one EBB */
+ while ((ebb_state.stats.ebb_count < 20 && !bad_mmcr2) ||
+ ebb_state.stats.ebb_count < 1)
+ {
+ mtspr(SPRN_MMCR2, expected[i % 2]);
+
+ FAIL_IF(core_busy_loop());
+
+ val = mfspr(SPRN_MMCR2);
+ if (val != expected[i % 2]) {
+ bad_mmcr2 = true;
+ actual = val;
+ }
+
+ i++;
+ }
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+ if (bad_mmcr2)
+ printf("Bad MMCR2 value seen is 0x%lx\n", actual);
+
+ FAIL_IF(bad_mmcr2);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(cycles_with_mmcr2, "cycles_with_mmcr2");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
new file mode 100644
index 000000000..2694ae161
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
@@ -0,0 +1,485 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE /* For CPU_ZERO etc. */
+
+#include <sched.h>
+#include <sys/wait.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "trace.h"
+#include "ebb.h"
+
+
+void (*ebb_user_func)(void);
+
+void ebb_hook(void)
+{
+ if (ebb_user_func)
+ ebb_user_func();
+}
+
+struct ebb_state ebb_state;
+
+u64 sample_period = 0x40000000ull;
+
+void reset_ebb_with_clear_mask(unsigned long mmcr0_clear_mask)
+{
+ u64 val;
+
+ /* 2) clear MMCR0[PMAO] - docs say BESCR[PMEO] should do this */
+ /* 3) set MMCR0[PMAE] - docs say BESCR[PME] should do this */
+ val = mfspr(SPRN_MMCR0);
+ mtspr(SPRN_MMCR0, (val & ~mmcr0_clear_mask) | MMCR0_PMAE);
+
+ /* 4) clear BESCR[PMEO] */
+ mtspr(SPRN_BESCRR, BESCR_PMEO);
+
+ /* 5) set BESCR[PME] */
+ mtspr(SPRN_BESCRS, BESCR_PME);
+
+ /* 6) rfebb 1 - done in our caller */
+}
+
+void reset_ebb(void)
+{
+ reset_ebb_with_clear_mask(MMCR0_PMAO | MMCR0_FC);
+}
+
+/* Called outside of the EBB handler to check MMCR0 is sane */
+int ebb_check_mmcr0(void)
+{
+ u64 val;
+
+ val = mfspr(SPRN_MMCR0);
+ if ((val & (MMCR0_FC | MMCR0_PMAO)) == MMCR0_FC) {
+ /* It's OK if we see FC & PMAO, but not FC by itself */
+ printf("Outside of loop, only FC set 0x%llx\n", val);
+ return 1;
+ }
+
+ return 0;
+}
+
+bool ebb_check_count(int pmc, u64 sample_period, int fudge)
+{
+ u64 count, upper, lower;
+
+ count = ebb_state.stats.pmc_count[PMC_INDEX(pmc)];
+
+ lower = ebb_state.stats.ebb_count * (sample_period - fudge);
+
+ if (count < lower) {
+ printf("PMC%d count (0x%llx) below lower limit 0x%llx (-0x%llx)\n",
+ pmc, count, lower, lower - count);
+ return false;
+ }
+
+ upper = ebb_state.stats.ebb_count * (sample_period + fudge);
+
+ if (count > upper) {
+ printf("PMC%d count (0x%llx) above upper limit 0x%llx (+0x%llx)\n",
+ pmc, count, upper, count - upper);
+ return false;
+ }
+
+ printf("PMC%d count (0x%llx) is between 0x%llx and 0x%llx delta +0x%llx/-0x%llx\n",
+ pmc, count, lower, upper, count - lower, upper - count);
+
+ return true;
+}
+
+void standard_ebb_callee(void)
+{
+ int found, i;
+ u64 val;
+
+ val = mfspr(SPRN_BESCR);
+ if (!(val & BESCR_PMEO)) {
+ ebb_state.stats.spurious++;
+ goto out;
+ }
+
+ ebb_state.stats.ebb_count++;
+ trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+ val = mfspr(SPRN_MMCR0);
+ trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+
+ found = 0;
+ for (i = 1; i <= 6; i++) {
+ if (ebb_state.pmc_enable[PMC_INDEX(i)])
+ found += count_pmc(i, sample_period);
+ }
+
+ if (!found)
+ ebb_state.stats.no_overflow++;
+
+out:
+ reset_ebb();
+}
+
+extern void ebb_handler(void);
+
+void setup_ebb_handler(void (*callee)(void))
+{
+ u64 entry;
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+ entry = (u64)ebb_handler;
+#else
+ struct opd
+ {
+ u64 entry;
+ u64 toc;
+ } *opd;
+
+ opd = (struct opd *)ebb_handler;
+ entry = opd->entry;
+#endif
+ printf("EBB Handler is at %#llx\n", entry);
+
+ ebb_user_func = callee;
+
+ /* Ensure ebb_user_func is set before we set the handler */
+ mb();
+ mtspr(SPRN_EBBHR, entry);
+
+ /* Make sure the handler is set before we return */
+ mb();
+}
+
+void clear_ebb_stats(void)
+{
+ memset(&ebb_state.stats, 0, sizeof(ebb_state.stats));
+}
+
+void dump_summary_ebb_state(void)
+{
+ printf("ebb_state:\n" \
+ " ebb_count = %d\n" \
+ " spurious = %d\n" \
+ " negative = %d\n" \
+ " no_overflow = %d\n" \
+ " pmc[1] count = 0x%llx\n" \
+ " pmc[2] count = 0x%llx\n" \
+ " pmc[3] count = 0x%llx\n" \
+ " pmc[4] count = 0x%llx\n" \
+ " pmc[5] count = 0x%llx\n" \
+ " pmc[6] count = 0x%llx\n",
+ ebb_state.stats.ebb_count, ebb_state.stats.spurious,
+ ebb_state.stats.negative, ebb_state.stats.no_overflow,
+ ebb_state.stats.pmc_count[0], ebb_state.stats.pmc_count[1],
+ ebb_state.stats.pmc_count[2], ebb_state.stats.pmc_count[3],
+ ebb_state.stats.pmc_count[4], ebb_state.stats.pmc_count[5]);
+}
+
+static char *decode_mmcr0(u32 value)
+{
+ static char buf[16];
+
+ buf[0] = '\0';
+
+ if (value & (1 << 31))
+ strcat(buf, "FC ");
+ if (value & (1 << 26))
+ strcat(buf, "PMAE ");
+ if (value & (1 << 7))
+ strcat(buf, "PMAO ");
+
+ return buf;
+}
+
+static char *decode_bescr(u64 value)
+{
+ static char buf[16];
+
+ buf[0] = '\0';
+
+ if (value & (1ull << 63))
+ strcat(buf, "GE ");
+ if (value & (1ull << 32))
+ strcat(buf, "PMAE ");
+ if (value & 1)
+ strcat(buf, "PMAO ");
+
+ return buf;
+}
+
+void dump_ebb_hw_state(void)
+{
+ u64 bescr;
+ u32 mmcr0;
+
+ mmcr0 = mfspr(SPRN_MMCR0);
+ bescr = mfspr(SPRN_BESCR);
+
+ printf("HW state:\n" \
+ "MMCR0 0x%016x %s\n" \
+ "MMCR2 0x%016lx\n" \
+ "EBBHR 0x%016lx\n" \
+ "BESCR 0x%016llx %s\n" \
+ "PMC1 0x%016lx\n" \
+ "PMC2 0x%016lx\n" \
+ "PMC3 0x%016lx\n" \
+ "PMC4 0x%016lx\n" \
+ "PMC5 0x%016lx\n" \
+ "PMC6 0x%016lx\n" \
+ "SIAR 0x%016lx\n",
+ mmcr0, decode_mmcr0(mmcr0), mfspr(SPRN_MMCR2),
+ mfspr(SPRN_EBBHR), bescr, decode_bescr(bescr),
+ mfspr(SPRN_PMC1), mfspr(SPRN_PMC2), mfspr(SPRN_PMC3),
+ mfspr(SPRN_PMC4), mfspr(SPRN_PMC5), mfspr(SPRN_PMC6),
+ mfspr(SPRN_SIAR));
+}
+
+void dump_ebb_state(void)
+{
+ dump_summary_ebb_state();
+
+ dump_ebb_hw_state();
+
+ trace_buffer_print(ebb_state.trace);
+}
+
+int count_pmc(int pmc, uint32_t sample_period)
+{
+ uint32_t start_value;
+ u64 val;
+
+ /* 0) Read PMC */
+ start_value = pmc_sample_period(sample_period);
+
+ val = read_pmc(pmc);
+ if (val < start_value)
+ ebb_state.stats.negative++;
+ else
+ ebb_state.stats.pmc_count[PMC_INDEX(pmc)] += val - start_value;
+
+ trace_log_reg(ebb_state.trace, SPRN_PMC1 + pmc - 1, val);
+
+ /* 1) Reset PMC */
+ write_pmc(pmc, start_value);
+
+ /* Report if we overflowed */
+ return val >= COUNTER_OVERFLOW;
+}
+
+int ebb_event_enable(struct event *e)
+{
+ int rc;
+
+ /* Ensure any SPR writes are ordered vs us */
+ mb();
+
+ rc = ioctl(e->fd, PERF_EVENT_IOC_ENABLE);
+ if (rc)
+ return rc;
+
+ rc = event_read(e);
+
+ /* Ditto */
+ mb();
+
+ return rc;
+}
+
+void ebb_freeze_pmcs(void)
+{
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+ mb();
+}
+
+void ebb_unfreeze_pmcs(void)
+{
+ /* Unfreeze counters */
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+ mb();
+}
+
+void ebb_global_enable(void)
+{
+ /* Enable EBBs globally and PMU EBBs */
+ mtspr(SPRN_BESCR, 0x8000000100000000ull);
+ mb();
+}
+
+void ebb_global_disable(void)
+{
+ /* Disable EBBs & freeze counters, events are still scheduled */
+ mtspr(SPRN_BESCRR, BESCR_PME);
+ mb();
+}
+
+bool ebb_is_supported(void)
+{
+#ifdef PPC_FEATURE2_EBB
+ /* EBB requires at least POWER8 */
+ return have_hwcap2(PPC_FEATURE2_EBB);
+#else
+ return false;
+#endif
+}
+
+void event_ebb_init(struct event *e)
+{
+ e->attr.config |= (1ull << 63);
+}
+
+void event_bhrb_init(struct event *e, unsigned ifm)
+{
+ e->attr.config |= (1ull << 62) | ((u64)ifm << 60);
+}
+
+void event_leader_ebb_init(struct event *e)
+{
+ event_ebb_init(e);
+
+ e->attr.exclusive = 1;
+ e->attr.pinned = 1;
+}
+
+int ebb_child(union pipe read_pipe, union pipe write_pipe)
+{
+ struct event event;
+ uint64_t val;
+
+ FAIL_IF(wait_for_parent(read_pipe));
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ ebb_enable_pmc_counting(1);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+
+ FAIL_IF(event_enable(&event));
+
+ if (event_read(&event)) {
+ /*
+ * Some tests expect to fail here, so don't report an error on
+ * this line, and return a distinguisable error code. Tell the
+ * parent an error happened.
+ */
+ notify_parent_of_error(write_pipe);
+ return 2;
+ }
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ FAIL_IF(notify_parent(write_pipe));
+ FAIL_IF(wait_for_parent(read_pipe));
+ FAIL_IF(notify_parent(write_pipe));
+
+ while (ebb_state.stats.ebb_count < 20) {
+ FAIL_IF(core_busy_loop());
+
+ /* To try and hit SIGILL case */
+ val = mfspr(SPRN_MMCRA);
+ val |= mfspr(SPRN_MMCR2);
+ val |= mfspr(SPRN_MMCR0);
+ }
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+ return 0;
+}
+
+static jmp_buf setjmp_env;
+
+static void sigill_handler(int signal)
+{
+ printf("Took sigill\n");
+ longjmp(setjmp_env, 1);
+}
+
+static struct sigaction sigill_action = {
+ .sa_handler = sigill_handler,
+};
+
+int catch_sigill(void (*func)(void))
+{
+ if (sigaction(SIGILL, &sigill_action, NULL)) {
+ perror("sigaction");
+ return 1;
+ }
+
+ if (setjmp(setjmp_env) == 0) {
+ func();
+ return 1;
+ }
+
+ return 0;
+}
+
+void write_pmc1(void)
+{
+ mtspr(SPRN_PMC1, 0);
+}
+
+void write_pmc(int pmc, u64 value)
+{
+ switch (pmc) {
+ case 1: mtspr(SPRN_PMC1, value); break;
+ case 2: mtspr(SPRN_PMC2, value); break;
+ case 3: mtspr(SPRN_PMC3, value); break;
+ case 4: mtspr(SPRN_PMC4, value); break;
+ case 5: mtspr(SPRN_PMC5, value); break;
+ case 6: mtspr(SPRN_PMC6, value); break;
+ }
+}
+
+u64 read_pmc(int pmc)
+{
+ switch (pmc) {
+ case 1: return mfspr(SPRN_PMC1);
+ case 2: return mfspr(SPRN_PMC2);
+ case 3: return mfspr(SPRN_PMC3);
+ case 4: return mfspr(SPRN_PMC4);
+ case 5: return mfspr(SPRN_PMC5);
+ case 6: return mfspr(SPRN_PMC6);
+ }
+
+ return 0;
+}
+
+static void term_handler(int signal)
+{
+ dump_summary_ebb_state();
+ dump_ebb_hw_state();
+ abort();
+}
+
+struct sigaction term_action = {
+ .sa_handler = term_handler,
+};
+
+static void __attribute__((constructor)) ebb_init(void)
+{
+ clear_ebb_stats();
+
+ if (sigaction(SIGTERM, &term_action, NULL))
+ perror("sigaction");
+
+ ebb_state.trace = trace_buffer_allocate(1 * 1024 * 1024);
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
new file mode 100644
index 000000000..f87e761f8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PMU_EBB_EBB_H
+#define _SELFTESTS_POWERPC_PMU_EBB_EBB_H
+
+#include "../event.h"
+#include "../lib.h"
+#include "trace.h"
+#include "reg.h"
+
+#define PMC_INDEX(pmc) ((pmc)-1)
+
+#define NUM_PMC_VALUES 128
+
+struct ebb_state
+{
+ struct {
+ u64 pmc_count[6];
+ volatile int ebb_count;
+ int spurious;
+ int negative;
+ int no_overflow;
+ } stats;
+
+ bool pmc_enable[6];
+ struct trace_buffer *trace;
+};
+
+extern struct ebb_state ebb_state;
+
+#define COUNTER_OVERFLOW 0x80000000ull
+
+static inline uint32_t pmc_sample_period(uint32_t value)
+{
+ return COUNTER_OVERFLOW - value;
+}
+
+static inline void ebb_enable_pmc_counting(int pmc)
+{
+ ebb_state.pmc_enable[PMC_INDEX(pmc)] = true;
+}
+
+bool ebb_check_count(int pmc, u64 sample_period, int fudge);
+void event_leader_ebb_init(struct event *e);
+void event_ebb_init(struct event *e);
+void event_bhrb_init(struct event *e, unsigned ifm);
+void setup_ebb_handler(void (*callee)(void));
+void standard_ebb_callee(void);
+int ebb_event_enable(struct event *e);
+void ebb_global_enable(void);
+void ebb_global_disable(void);
+bool ebb_is_supported(void);
+void ebb_freeze_pmcs(void);
+void ebb_unfreeze_pmcs(void);
+void event_ebb_init(struct event *e);
+void event_leader_ebb_init(struct event *e);
+int count_pmc(int pmc, uint32_t sample_period);
+void dump_ebb_state(void);
+void dump_summary_ebb_state(void);
+void dump_ebb_hw_state(void);
+void clear_ebb_stats(void);
+void write_pmc(int pmc, u64 value);
+u64 read_pmc(int pmc);
+void reset_ebb_with_clear_mask(unsigned long mmcr0_clear_mask);
+void reset_ebb(void);
+int ebb_check_mmcr0(void);
+
+extern u64 sample_period;
+
+int core_busy_loop(void);
+int ebb_child(union pipe read_pipe, union pipe write_pipe);
+int catch_sigill(void (*func)(void));
+void write_pmc1(void);
+
+#endif /* _SELFTESTS_POWERPC_PMU_EBB_EBB_H */
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S b/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S
new file mode 100644
index 000000000..14274ea20
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S
@@ -0,0 +1,365 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+#include "reg.h"
+
+
+/* ppc-asm.h defines most of the reg aliases, but not r1/r2. */
+#define r1 1
+#define r2 2
+
+#define RFEBB .long 0x4c000924
+
+/* Stack layout:
+ *
+ * ^
+ * User stack |
+ * Back chain ------+ <- r1 <-------+
+ * ... |
+ * Red zone / ABI Gap |
+ * ... |
+ * vr63 <+ |
+ * vr0 | |
+ * VSCR | |
+ * FSCR | |
+ * r31 | Save area |
+ * r0 | |
+ * XER | |
+ * CTR | |
+ * LR | |
+ * CCR <+ |
+ * ... <+ |
+ * LR | Caller frame |
+ * CCR | |
+ * Back chain <+ <- updated r1 --------+
+ *
+ */
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define ABIGAP 512
+#else
+#define ABIGAP 288
+#endif
+
+#define NR_GPR 32
+#define NR_SPR 6
+#define NR_VSR 64
+
+#define SAVE_AREA ((NR_GPR + NR_SPR) * 8 + (NR_VSR * 16))
+#define CALLER_FRAME 112
+
+#define STACK_FRAME (ABIGAP + SAVE_AREA + CALLER_FRAME)
+
+#define CCR_SAVE (CALLER_FRAME)
+#define LR_SAVE (CCR_SAVE + 8)
+#define CTR_SAVE (LR_SAVE + 8)
+#define XER_SAVE (CTR_SAVE + 8)
+#define GPR_SAVE(n) (XER_SAVE + 8 + (8 * n))
+#define FSCR_SAVE (GPR_SAVE(31) + 8)
+#define VSCR_SAVE (FSCR_SAVE + 8)
+#define VSR_SAVE(n) (VSCR_SAVE + 8 + (16 * n))
+
+#define SAVE_GPR(n) std n,GPR_SAVE(n)(r1)
+#define REST_GPR(n) ld n,GPR_SAVE(n)(r1)
+#define TRASH_GPR(n) lis n,0xaaaa
+
+#define SAVE_VSR(n, b) li b, VSR_SAVE(n); stxvd2x n,b,r1
+#define LOAD_VSR(n, b) li b, VSR_SAVE(n); lxvd2x n,b,r1
+
+#define LOAD_REG_IMMEDIATE(reg,expr) \
+ lis reg,(expr)@highest; \
+ ori reg,reg,(expr)@higher; \
+ rldicr reg,reg,32,31; \
+ oris reg,reg,(expr)@h; \
+ ori reg,reg,(expr)@l;
+
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define ENTRY_POINT(name) \
+ .type FUNC_NAME(name),@function; \
+ .globl FUNC_NAME(name); \
+ FUNC_NAME(name):
+
+#define RESTORE_TOC(name) \
+ /* Restore our TOC pointer using our entry point */ \
+ LOAD_REG_IMMEDIATE(r12, name) \
+0: addis r2,r12,(.TOC.-0b)@ha; \
+ addi r2,r2,(.TOC.-0b)@l;
+
+#else
+#define ENTRY_POINT(name) FUNC_START(name)
+#define RESTORE_TOC(name) \
+ /* Restore our TOC pointer via our opd entry */ \
+ LOAD_REG_IMMEDIATE(r2, name) \
+ ld r2,8(r2);
+#endif
+
+ .text
+
+ENTRY_POINT(ebb_handler)
+ stdu r1,-STACK_FRAME(r1)
+ SAVE_GPR(0)
+ mflr r0
+ std r0,LR_SAVE(r1)
+ mfcr r0
+ std r0,CCR_SAVE(r1)
+ mfctr r0
+ std r0,CTR_SAVE(r1)
+ mfxer r0
+ std r0,XER_SAVE(r1)
+ SAVE_GPR(2)
+ SAVE_GPR(3)
+ SAVE_GPR(4)
+ SAVE_GPR(5)
+ SAVE_GPR(6)
+ SAVE_GPR(7)
+ SAVE_GPR(8)
+ SAVE_GPR(9)
+ SAVE_GPR(10)
+ SAVE_GPR(11)
+ SAVE_GPR(12)
+ SAVE_GPR(13)
+ SAVE_GPR(14)
+ SAVE_GPR(15)
+ SAVE_GPR(16)
+ SAVE_GPR(17)
+ SAVE_GPR(18)
+ SAVE_GPR(19)
+ SAVE_GPR(20)
+ SAVE_GPR(21)
+ SAVE_GPR(22)
+ SAVE_GPR(23)
+ SAVE_GPR(24)
+ SAVE_GPR(25)
+ SAVE_GPR(26)
+ SAVE_GPR(27)
+ SAVE_GPR(28)
+ SAVE_GPR(29)
+ SAVE_GPR(30)
+ SAVE_GPR(31)
+ SAVE_VSR(0, r3)
+ mffs f0
+ stfd f0, FSCR_SAVE(r1)
+ mfvscr f0
+ stfd f0, VSCR_SAVE(r1)
+ SAVE_VSR(1, r3)
+ SAVE_VSR(2, r3)
+ SAVE_VSR(3, r3)
+ SAVE_VSR(4, r3)
+ SAVE_VSR(5, r3)
+ SAVE_VSR(6, r3)
+ SAVE_VSR(7, r3)
+ SAVE_VSR(8, r3)
+ SAVE_VSR(9, r3)
+ SAVE_VSR(10, r3)
+ SAVE_VSR(11, r3)
+ SAVE_VSR(12, r3)
+ SAVE_VSR(13, r3)
+ SAVE_VSR(14, r3)
+ SAVE_VSR(15, r3)
+ SAVE_VSR(16, r3)
+ SAVE_VSR(17, r3)
+ SAVE_VSR(18, r3)
+ SAVE_VSR(19, r3)
+ SAVE_VSR(20, r3)
+ SAVE_VSR(21, r3)
+ SAVE_VSR(22, r3)
+ SAVE_VSR(23, r3)
+ SAVE_VSR(24, r3)
+ SAVE_VSR(25, r3)
+ SAVE_VSR(26, r3)
+ SAVE_VSR(27, r3)
+ SAVE_VSR(28, r3)
+ SAVE_VSR(29, r3)
+ SAVE_VSR(30, r3)
+ SAVE_VSR(31, r3)
+ SAVE_VSR(32, r3)
+ SAVE_VSR(33, r3)
+ SAVE_VSR(34, r3)
+ SAVE_VSR(35, r3)
+ SAVE_VSR(36, r3)
+ SAVE_VSR(37, r3)
+ SAVE_VSR(38, r3)
+ SAVE_VSR(39, r3)
+ SAVE_VSR(40, r3)
+ SAVE_VSR(41, r3)
+ SAVE_VSR(42, r3)
+ SAVE_VSR(43, r3)
+ SAVE_VSR(44, r3)
+ SAVE_VSR(45, r3)
+ SAVE_VSR(46, r3)
+ SAVE_VSR(47, r3)
+ SAVE_VSR(48, r3)
+ SAVE_VSR(49, r3)
+ SAVE_VSR(50, r3)
+ SAVE_VSR(51, r3)
+ SAVE_VSR(52, r3)
+ SAVE_VSR(53, r3)
+ SAVE_VSR(54, r3)
+ SAVE_VSR(55, r3)
+ SAVE_VSR(56, r3)
+ SAVE_VSR(57, r3)
+ SAVE_VSR(58, r3)
+ SAVE_VSR(59, r3)
+ SAVE_VSR(60, r3)
+ SAVE_VSR(61, r3)
+ SAVE_VSR(62, r3)
+ SAVE_VSR(63, r3)
+
+ TRASH_GPR(2)
+ TRASH_GPR(3)
+ TRASH_GPR(4)
+ TRASH_GPR(5)
+ TRASH_GPR(6)
+ TRASH_GPR(7)
+ TRASH_GPR(8)
+ TRASH_GPR(9)
+ TRASH_GPR(10)
+ TRASH_GPR(11)
+ TRASH_GPR(12)
+ TRASH_GPR(14)
+ TRASH_GPR(15)
+ TRASH_GPR(16)
+ TRASH_GPR(17)
+ TRASH_GPR(18)
+ TRASH_GPR(19)
+ TRASH_GPR(20)
+ TRASH_GPR(21)
+ TRASH_GPR(22)
+ TRASH_GPR(23)
+ TRASH_GPR(24)
+ TRASH_GPR(25)
+ TRASH_GPR(26)
+ TRASH_GPR(27)
+ TRASH_GPR(28)
+ TRASH_GPR(29)
+ TRASH_GPR(30)
+ TRASH_GPR(31)
+
+ RESTORE_TOC(ebb_handler)
+
+ /*
+ * r13 is our TLS pointer. We leave whatever value was in there when the
+ * EBB fired. That seems to be OK because once set the TLS pointer is not
+ * changed - but presumably that could change in future.
+ */
+
+ bl ebb_hook
+ nop
+
+ /* r2 may be changed here but we don't care */
+
+ lfd f0, FSCR_SAVE(r1)
+ mtfsf 0xff,f0
+ lfd f0, VSCR_SAVE(r1)
+ mtvscr f0
+ LOAD_VSR(0, r3)
+ LOAD_VSR(1, r3)
+ LOAD_VSR(2, r3)
+ LOAD_VSR(3, r3)
+ LOAD_VSR(4, r3)
+ LOAD_VSR(5, r3)
+ LOAD_VSR(6, r3)
+ LOAD_VSR(7, r3)
+ LOAD_VSR(8, r3)
+ LOAD_VSR(9, r3)
+ LOAD_VSR(10, r3)
+ LOAD_VSR(11, r3)
+ LOAD_VSR(12, r3)
+ LOAD_VSR(13, r3)
+ LOAD_VSR(14, r3)
+ LOAD_VSR(15, r3)
+ LOAD_VSR(16, r3)
+ LOAD_VSR(17, r3)
+ LOAD_VSR(18, r3)
+ LOAD_VSR(19, r3)
+ LOAD_VSR(20, r3)
+ LOAD_VSR(21, r3)
+ LOAD_VSR(22, r3)
+ LOAD_VSR(23, r3)
+ LOAD_VSR(24, r3)
+ LOAD_VSR(25, r3)
+ LOAD_VSR(26, r3)
+ LOAD_VSR(27, r3)
+ LOAD_VSR(28, r3)
+ LOAD_VSR(29, r3)
+ LOAD_VSR(30, r3)
+ LOAD_VSR(31, r3)
+ LOAD_VSR(32, r3)
+ LOAD_VSR(33, r3)
+ LOAD_VSR(34, r3)
+ LOAD_VSR(35, r3)
+ LOAD_VSR(36, r3)
+ LOAD_VSR(37, r3)
+ LOAD_VSR(38, r3)
+ LOAD_VSR(39, r3)
+ LOAD_VSR(40, r3)
+ LOAD_VSR(41, r3)
+ LOAD_VSR(42, r3)
+ LOAD_VSR(43, r3)
+ LOAD_VSR(44, r3)
+ LOAD_VSR(45, r3)
+ LOAD_VSR(46, r3)
+ LOAD_VSR(47, r3)
+ LOAD_VSR(48, r3)
+ LOAD_VSR(49, r3)
+ LOAD_VSR(50, r3)
+ LOAD_VSR(51, r3)
+ LOAD_VSR(52, r3)
+ LOAD_VSR(53, r3)
+ LOAD_VSR(54, r3)
+ LOAD_VSR(55, r3)
+ LOAD_VSR(56, r3)
+ LOAD_VSR(57, r3)
+ LOAD_VSR(58, r3)
+ LOAD_VSR(59, r3)
+ LOAD_VSR(60, r3)
+ LOAD_VSR(61, r3)
+ LOAD_VSR(62, r3)
+ LOAD_VSR(63, r3)
+
+ ld r0,XER_SAVE(r1)
+ mtxer r0
+ ld r0,CTR_SAVE(r1)
+ mtctr r0
+ ld r0,LR_SAVE(r1)
+ mtlr r0
+ ld r0,CCR_SAVE(r1)
+ mtcr r0
+ REST_GPR(0)
+ REST_GPR(2)
+ REST_GPR(3)
+ REST_GPR(4)
+ REST_GPR(5)
+ REST_GPR(6)
+ REST_GPR(7)
+ REST_GPR(8)
+ REST_GPR(9)
+ REST_GPR(10)
+ REST_GPR(11)
+ REST_GPR(12)
+ REST_GPR(13)
+ REST_GPR(14)
+ REST_GPR(15)
+ REST_GPR(16)
+ REST_GPR(17)
+ REST_GPR(18)
+ REST_GPR(19)
+ REST_GPR(20)
+ REST_GPR(21)
+ REST_GPR(22)
+ REST_GPR(23)
+ REST_GPR(24)
+ REST_GPR(25)
+ REST_GPR(26)
+ REST_GPR(27)
+ REST_GPR(28)
+ REST_GPR(29)
+ REST_GPR(30)
+ REST_GPR(31)
+ addi r1,r1,STACK_FRAME
+ RFEBB
+FUNC_END(ebb_handler)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c
new file mode 100644
index 000000000..1e7b7fe23
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests we can setup an EBB on our child. Nothing interesting happens, because
+ * even though the event is enabled and running the child hasn't enabled the
+ * actual delivery of the EBBs.
+ */
+
+static int victim_child(union pipe read_pipe, union pipe write_pipe)
+{
+ int i;
+
+ FAIL_IF(wait_for_parent(read_pipe));
+ FAIL_IF(notify_parent(write_pipe));
+
+ /* Parent creates EBB event */
+
+ FAIL_IF(wait_for_parent(read_pipe));
+ FAIL_IF(notify_parent(write_pipe));
+
+ /* Check the EBB is enabled by writing PMC1 */
+ write_pmc1();
+
+ /* EBB event is enabled here */
+ for (i = 0; i < 1000000; i++) ;
+
+ return 0;
+}
+
+int ebb_on_child(void)
+{
+ union pipe read_pipe, write_pipe;
+ struct event event;
+ pid_t pid;
+
+ SKIP_IF(!ebb_is_supported());
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ /* NB order of pipes looks reversed */
+ exit(victim_child(write_pipe, read_pipe));
+ }
+
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+ /* Child is running now */
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open_with_pid(&event, pid));
+ FAIL_IF(ebb_event_enable(&event));
+
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+ /* Child should just exit happily */
+ FAIL_IF(wait_for_child(pid));
+
+ event_close(&event);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(ebb_on_child, "ebb_on_child");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
new file mode 100644
index 000000000..174e4f4da
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests we can setup an EBB on our child. The child expects this and enables
+ * EBBs, which are then delivered to the child, even though the event is
+ * created by the parent.
+ */
+
+static int victim_child(union pipe read_pipe, union pipe write_pipe)
+{
+ FAIL_IF(wait_for_parent(read_pipe));
+
+ /* Setup our EBB handler, before the EBB event is created */
+ ebb_enable_pmc_counting(1);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+
+ FAIL_IF(notify_parent(write_pipe));
+
+ while (ebb_state.stats.ebb_count < 20) {
+ FAIL_IF(core_busy_loop());
+ }
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+ return 0;
+}
+
+/* Tests we can setup an EBB on our child - if it's expecting it */
+int ebb_on_willing_child(void)
+{
+ union pipe read_pipe, write_pipe;
+ struct event event;
+ pid_t pid;
+
+ SKIP_IF(!ebb_is_supported());
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ /* NB order of pipes looks reversed */
+ exit(victim_child(write_pipe, read_pipe));
+ }
+
+ /* Signal the child to setup its EBB handler */
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+ /* Child is running now */
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open_with_pid(&event, pid));
+ FAIL_IF(ebb_event_enable(&event));
+
+ /* Child show now take EBBs and then exit */
+ FAIL_IF(wait_for_child(pid));
+
+ event_close(&event);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(ebb_on_willing_child, "ebb_on_willing_child");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
new file mode 100644
index 000000000..af20a2b36
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests an EBB vs a cpu event - in that order. The EBB should force the cpu
+ * event off the PMU.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+ event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+ event->attr.exclude_kernel = 1;
+ event->attr.exclude_hv = 1;
+ event->attr.exclude_idle = 1;
+
+ SKIP_IF(require_paranoia_below(1));
+ FAIL_IF(event_open_with_cpu(event, cpu));
+ FAIL_IF(event_enable(event));
+
+ return 0;
+}
+
+int ebb_vs_cpu_event(void)
+{
+ union pipe read_pipe, write_pipe;
+ struct event event;
+ int cpu, rc;
+ pid_t pid;
+
+ SKIP_IF(!ebb_is_supported());
+
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+ FAIL_IF(bind_to_cpu(cpu));
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ /* NB order of pipes looks reversed */
+ exit(ebb_child(write_pipe, read_pipe));
+ }
+
+ /* Signal the child to install its EBB event and wait */
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+ /* Now try to install our CPU event */
+ rc = setup_cpu_event(&event, cpu);
+ if (rc) {
+ kill_child_and_wait(pid);
+ return rc;
+ }
+
+ /* Signal the child to run */
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+ /* .. and wait for it to complete */
+ FAIL_IF(wait_for_child(pid));
+ FAIL_IF(event_disable(&event));
+ FAIL_IF(event_read(&event));
+
+ event_report(&event);
+
+ /* The cpu event may have run, but we don't expect 100% */
+ FAIL_IF(event.result.enabled >= event.result.running);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(ebb_vs_cpu_event, "ebb_vs_cpu_event");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c b/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c
new file mode 100644
index 000000000..7762ab26e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test various attributes of the EBB event are enforced.
+ */
+int event_attributes(void)
+{
+ struct event event, leader;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init(&event, 0x1001e);
+ event_leader_ebb_init(&event);
+ /* Expected to succeed */
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+
+ event_init(&event, 0x001e); /* CYCLES - no PMC specified */
+ event_leader_ebb_init(&event);
+ /* Expected to fail, no PMC specified */
+ FAIL_IF(event_open(&event) == 0);
+
+
+ event_init(&event, 0x2001e);
+ event_leader_ebb_init(&event);
+ event.attr.exclusive = 0;
+ /* Expected to fail, not exclusive */
+ FAIL_IF(event_open(&event) == 0);
+
+
+ event_init(&event, 0x3001e);
+ event_leader_ebb_init(&event);
+ event.attr.freq = 1;
+ /* Expected to fail, sets freq */
+ FAIL_IF(event_open(&event) == 0);
+
+
+ event_init(&event, 0x4001e);
+ event_leader_ebb_init(&event);
+ event.attr.sample_period = 1;
+ /* Expected to fail, sets sample_period */
+ FAIL_IF(event_open(&event) == 0);
+
+
+ event_init(&event, 0x1001e);
+ event_leader_ebb_init(&event);
+ event.attr.enable_on_exec = 1;
+ /* Expected to fail, sets enable_on_exec */
+ FAIL_IF(event_open(&event) == 0);
+
+
+ event_init(&event, 0x1001e);
+ event_leader_ebb_init(&event);
+ event.attr.inherit = 1;
+ /* Expected to fail, sets inherit */
+ FAIL_IF(event_open(&event) == 0);
+
+
+ event_init(&leader, 0x1001e);
+ event_leader_ebb_init(&leader);
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, 0x20002);
+ event_ebb_init(&event);
+
+ /* Expected to succeed */
+ FAIL_IF(event_open_with_group(&event, leader.fd));
+ event_close(&leader);
+ event_close(&event);
+
+
+ event_init(&leader, 0x1001e);
+ event_leader_ebb_init(&leader);
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, 0x20002);
+
+ /* Expected to fail, event doesn't request EBB, leader does */
+ FAIL_IF(event_open_with_group(&event, leader.fd) == 0);
+ event_close(&leader);
+
+
+ event_init(&leader, 0x1001e);
+ event_leader_ebb_init(&leader);
+ /* Clear the EBB flag */
+ leader.attr.config &= ~(1ull << 63);
+
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, 0x20002);
+ event_ebb_init(&event);
+
+ /* Expected to fail, leader doesn't request EBB */
+ FAIL_IF(event_open_with_group(&event, leader.fd) == 0);
+ event_close(&leader);
+
+
+ event_init(&leader, 0x1001e);
+ event_leader_ebb_init(&leader);
+ leader.attr.exclusive = 0;
+ /* Expected to fail, leader isn't exclusive */
+ FAIL_IF(event_open(&leader) == 0);
+
+
+ event_init(&leader, 0x1001e);
+ event_leader_ebb_init(&leader);
+ leader.attr.pinned = 0;
+ /* Expected to fail, leader isn't pinned */
+ FAIL_IF(event_open(&leader) == 0);
+
+ event_init(&event, 0x1001e);
+ event_leader_ebb_init(&event);
+ /* Expected to fail, not a task event */
+ SKIP_IF(require_paranoia_below(1));
+ FAIL_IF(event_open_with_cpu(&event, 0) == 0);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(event_attributes, "event_attributes");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
new file mode 100644
index 000000000..b866a0581
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+
+ .text
+
+FUNC_START(thirty_two_instruction_loop)
+ cmpwi r3,0
+ beqlr
+ addi r4,r3,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1 # 28 addi's
+ subi r3,r3,1
+ b FUNC_NAME(thirty_two_instruction_loop)
+FUNC_END(thirty_two_instruction_loop)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c b/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c
new file mode 100644
index 000000000..167135bd9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that a fork clears the PMU state of the child. eg. BESCR/EBBHR/EBBRR
+ * are cleared, and MMCR0_PMCC is reset, preventing the child from accessing
+ * the PMU.
+ */
+
+static struct event event;
+
+static int child(void)
+{
+ /* Even though we have EBE=0 we can still see the EBB regs */
+ FAIL_IF(mfspr(SPRN_BESCR) != 0);
+ FAIL_IF(mfspr(SPRN_EBBHR) != 0);
+ FAIL_IF(mfspr(SPRN_EBBRR) != 0);
+
+ FAIL_IF(catch_sigill(write_pmc1));
+
+ /* We can still read from the event, though it is on our parent */
+ FAIL_IF(event_read(&event));
+
+ return 0;
+}
+
+/* Tests that fork clears EBB state */
+int fork_cleanup(void)
+{
+ pid_t pid;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ FAIL_IF(event_open(&event));
+
+ ebb_enable_pmc_counting(1);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_MMCR0, MMCR0_FC);
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ /* Don't need to actually take any EBBs */
+
+ pid = fork();
+ if (pid == 0)
+ exit(child());
+
+ /* Child does the actual testing */
+ FAIL_IF(wait_for_child(pid));
+
+ /* After fork */
+ event_close(&event);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(fork_cleanup, "fork_cleanup");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c
new file mode 100644
index 000000000..35a3426e3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "ebb.h"
+
+
+/*
+ * Run a calibrated instruction loop and count instructions executed using
+ * EBBs. Make sure the counts look right.
+ */
+
+extern void thirty_two_instruction_loop(uint64_t loops);
+
+static bool counters_frozen = true;
+
+static int do_count_loop(struct event *event, uint64_t instructions,
+ uint64_t overhead, bool report)
+{
+ int64_t difference, expected;
+ double percentage;
+
+ clear_ebb_stats();
+
+ counters_frozen = false;
+ mb();
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+
+ thirty_two_instruction_loop(instructions >> 5);
+
+ counters_frozen = true;
+ mb();
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+
+ count_pmc(4, sample_period);
+
+ event->result.value = ebb_state.stats.pmc_count[4-1];
+ expected = instructions + overhead;
+ difference = event->result.value - expected;
+ percentage = (double)difference / event->result.value * 100;
+
+ if (report) {
+ printf("Looped for %lu instructions, overhead %lu\n", instructions, overhead);
+ printf("Expected %lu\n", expected);
+ printf("Actual %llu\n", event->result.value);
+ printf("Delta %ld, %f%%\n", difference, percentage);
+ printf("Took %d EBBs\n", ebb_state.stats.ebb_count);
+ }
+
+ if (difference < 0)
+ difference = -difference;
+
+ /* Tolerate a difference of up to 0.0001 % */
+ difference *= 10000 * 100;
+ if (difference / event->result.value)
+ return -1;
+
+ return 0;
+}
+
+/* Count how many instructions it takes to do a null loop */
+static uint64_t determine_overhead(struct event *event)
+{
+ uint64_t current, overhead;
+ int i;
+
+ do_count_loop(event, 0, 0, false);
+ overhead = event->result.value;
+
+ for (i = 0; i < 100; i++) {
+ do_count_loop(event, 0, 0, false);
+ current = event->result.value;
+ if (current < overhead) {
+ printf("Replacing overhead %lu with %lu\n", overhead, current);
+ overhead = current;
+ }
+ }
+
+ return overhead;
+}
+
+static void pmc4_ebb_callee(void)
+{
+ uint64_t val;
+
+ val = mfspr(SPRN_BESCR);
+ if (!(val & BESCR_PMEO)) {
+ ebb_state.stats.spurious++;
+ goto out;
+ }
+
+ ebb_state.stats.ebb_count++;
+ count_pmc(4, sample_period);
+out:
+ if (counters_frozen)
+ reset_ebb_with_clear_mask(MMCR0_PMAO);
+ else
+ reset_ebb();
+}
+
+int instruction_count(void)
+{
+ struct event event;
+ uint64_t overhead;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x400FA, "PM_RUN_INST_CMPL");
+ event_leader_ebb_init(&event);
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+ FAIL_IF(ebb_event_enable(&event));
+
+ sample_period = COUNTER_OVERFLOW;
+
+ setup_ebb_handler(pmc4_ebb_callee);
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+ ebb_global_enable();
+
+ overhead = determine_overhead(&event);
+ printf("Overhead of null loop: %lu instructions\n", overhead);
+
+ /* Run for 1M instructions */
+ FAIL_IF(do_count_loop(&event, 0x100000, overhead, true));
+
+ /* Run for 10M instructions */
+ FAIL_IF(do_count_loop(&event, 0xa00000, overhead, true));
+
+ /* Run for 100M instructions */
+ FAIL_IF(do_count_loop(&event, 0x6400000, overhead, true));
+
+ /* Run for 1G instructions */
+ FAIL_IF(do_count_loop(&event, 0x40000000, overhead, true));
+
+ /* Run for 16G instructions */
+ FAIL_IF(do_count_loop(&event, 0x400000000, overhead, true));
+
+ /* Run for 64G instructions */
+ FAIL_IF(do_count_loop(&event, 0x1000000000, overhead, true));
+
+ /* Run for 128G instructions */
+ FAIL_IF(do_count_loop(&event, 0x2000000000, overhead, true));
+
+ ebb_global_disable();
+ event_close(&event);
+
+ printf("Finished OK\n");
+
+ return 0;
+}
+
+int main(void)
+{
+ test_harness_set_timeout(300);
+ return test_harness(instruction_count, "instruction_count");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
new file mode 100644
index 000000000..dddb95938
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that tries to trigger CPU_FTR_PMAO_BUG. Which is a hardware defect
+ * where an exception triggers but we context switch before it is delivered and
+ * lose the exception.
+ */
+
+static int test_body(void)
+{
+ int i, orig_period, max_period;
+ struct event event;
+
+ SKIP_IF(!ebb_is_supported());
+
+ /* We use PMC4 to make sure the kernel switches all counters correctly */
+ event_init_named(&event, 0x40002, "instructions");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ ebb_enable_pmc_counting(4);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+ FAIL_IF(ebb_event_enable(&event));
+
+ /*
+ * We want a low sample period, but we also want to get out of the EBB
+ * handler without tripping up again.
+ *
+ * This value picked after much experimentation.
+ */
+ orig_period = max_period = sample_period = 400;
+
+ mtspr(SPRN_PMC4, pmc_sample_period(sample_period));
+
+ while (ebb_state.stats.ebb_count < 1000000) {
+ /*
+ * We are trying to get the EBB exception to race exactly with
+ * us entering the kernel to do the syscall. We then need the
+ * kernel to decide our timeslice is up and context switch to
+ * the other thread. When we come back our EBB will have been
+ * lost and we'll spin in this while loop forever.
+ */
+
+ for (i = 0; i < 100000; i++)
+ sched_yield();
+
+ /* Change the sample period slightly to try and hit the race */
+ if (sample_period >= (orig_period + 200))
+ sample_period = orig_period;
+ else
+ sample_period++;
+
+ if (sample_period > max_period)
+ max_period = sample_period;
+ }
+
+ ebb_freeze_pmcs();
+ ebb_global_disable();
+
+ mtspr(SPRN_PMC4, 0xdead);
+
+ dump_summary_ebb_state();
+ dump_ebb_hw_state();
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+ /* We vary our sample period so we need extra fudge here */
+ FAIL_IF(!ebb_check_count(4, orig_period, 2 * (max_period - orig_period)));
+
+ return 0;
+}
+
+static int lost_exception(void)
+{
+ return eat_cpu(test_body);
+}
+
+int main(void)
+{
+ test_harness_set_timeout(300);
+ return test_harness(lost_exception, "lost_exception");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
new file mode 100644
index 000000000..035c02273
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test counting multiple events using EBBs.
+ */
+int multi_counter(void)
+{
+ struct event events[6];
+ int i, group_fd;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&events[0], 0x1001C, "PM_CMPLU_STALL_THRD");
+ event_init_named(&events[1], 0x2D016, "PM_CMPLU_STALL_FXU");
+ event_init_named(&events[2], 0x30006, "PM_CMPLU_STALL_OTHER_CMPL");
+ event_init_named(&events[3], 0x4000A, "PM_CMPLU_STALL");
+ event_init_named(&events[4], 0x600f4, "PM_RUN_CYC");
+ event_init_named(&events[5], 0x500fa, "PM_RUN_INST_CMPL");
+
+ event_leader_ebb_init(&events[0]);
+ for (i = 1; i < 6; i++)
+ event_ebb_init(&events[i]);
+
+ group_fd = -1;
+ for (i = 0; i < 6; i++) {
+ events[i].attr.exclude_kernel = 1;
+ events[i].attr.exclude_hv = 1;
+ events[i].attr.exclude_idle = 1;
+
+ FAIL_IF(event_open_with_group(&events[i], group_fd));
+ if (group_fd == -1)
+ group_fd = events[0].fd;
+ }
+
+ ebb_enable_pmc_counting(1);
+ ebb_enable_pmc_counting(2);
+ ebb_enable_pmc_counting(3);
+ ebb_enable_pmc_counting(4);
+ ebb_enable_pmc_counting(5);
+ ebb_enable_pmc_counting(6);
+ setup_ebb_handler(standard_ebb_callee);
+
+ FAIL_IF(ioctl(events[0].fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP));
+ FAIL_IF(event_read(&events[0]));
+
+ ebb_global_enable();
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+ mtspr(SPRN_PMC2, pmc_sample_period(sample_period));
+ mtspr(SPRN_PMC3, pmc_sample_period(sample_period));
+ mtspr(SPRN_PMC4, pmc_sample_period(sample_period));
+ mtspr(SPRN_PMC5, pmc_sample_period(sample_period));
+ mtspr(SPRN_PMC6, pmc_sample_period(sample_period));
+
+ while (ebb_state.stats.ebb_count < 50) {
+ FAIL_IF(core_busy_loop());
+ FAIL_IF(ebb_check_mmcr0());
+ }
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ for (i = 0; i < 6; i++)
+ event_close(&events[i]);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(multi_counter, "multi_counter");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
new file mode 100644
index 000000000..3e9d4ac96
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test running multiple EBB using processes at once on a single CPU. They
+ * should all run happily without interfering with each other.
+ */
+
+static bool child_should_exit;
+
+static void sigint_handler(int signal)
+{
+ child_should_exit = true;
+}
+
+struct sigaction sigint_action = {
+ .sa_handler = sigint_handler,
+};
+
+static int cycles_child(void)
+{
+ struct event event;
+
+ if (sigaction(SIGINT, &sigint_action, NULL)) {
+ perror("sigaction");
+ return 1;
+ }
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ ebb_enable_pmc_counting(1);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ while (!child_should_exit) {
+ FAIL_IF(core_busy_loop());
+ FAIL_IF(ebb_check_mmcr0());
+ }
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_summary_ebb_state();
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+ return 0;
+}
+
+#define NR_CHILDREN 4
+
+int multi_ebb_procs(void)
+{
+ pid_t pids[NR_CHILDREN];
+ int cpu, rc, i;
+
+ SKIP_IF(!ebb_is_supported());
+
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+ FAIL_IF(bind_to_cpu(cpu));
+
+ for (i = 0; i < NR_CHILDREN; i++) {
+ pids[i] = fork();
+ if (pids[i] == 0)
+ exit(cycles_child());
+ }
+
+ /* Have them all run for "a while" */
+ sleep(10);
+
+ rc = 0;
+ for (i = 0; i < NR_CHILDREN; i++) {
+ /* Tell them to stop */
+ kill(pids[i], SIGINT);
+ /* And wait */
+ rc |= wait_for_child(pids[i]);
+ }
+
+ return rc;
+}
+
+int main(void)
+{
+ return test_harness(multi_ebb_procs, "multi_ebb_procs");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
new file mode 100644
index 000000000..87630d44f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/* Test that things work sanely if we have no handler */
+
+static int no_handler_test(void)
+{
+ struct event event;
+ u64 val;
+ int i;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+ FAIL_IF(ebb_event_enable(&event));
+
+ val = mfspr(SPRN_EBBHR);
+ FAIL_IF(val != 0);
+
+ /* Make sure it overflows quickly */
+ sample_period = 1000;
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ /* Spin to make sure the event has time to overflow */
+ for (i = 0; i < 1000; i++)
+ mb();
+
+ dump_ebb_state();
+
+ /* We expect to see the PMU frozen & PMAO set */
+ val = mfspr(SPRN_MMCR0);
+ FAIL_IF(val != 0x0000000080000080);
+
+ event_close(&event);
+
+ /* The real test is that we never took an EBB at 0x0 */
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(no_handler_test,"no_handler_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
new file mode 100644
index 000000000..d90891fe9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that the kernel properly handles PMAE across context switches.
+ *
+ * We test this by calling into the kernel inside our EBB handler, where PMAE
+ * is clear. A cpu eater companion thread is running on the same CPU as us to
+ * encourage the scheduler to switch us.
+ *
+ * The kernel must make sure that when it context switches us back in, it
+ * honours the fact that we had PMAE clear.
+ *
+ * Observed to hit the failing case on the first EBB with a broken kernel.
+ */
+
+static bool mmcr0_mismatch;
+static uint64_t before, after;
+
+static void syscall_ebb_callee(void)
+{
+ uint64_t val;
+
+ val = mfspr(SPRN_BESCR);
+ if (!(val & BESCR_PMEO)) {
+ ebb_state.stats.spurious++;
+ goto out;
+ }
+
+ ebb_state.stats.ebb_count++;
+ count_pmc(1, sample_period);
+
+ before = mfspr(SPRN_MMCR0);
+
+ /* Try and get ourselves scheduled, to force a PMU context switch */
+ sched_yield();
+
+ after = mfspr(SPRN_MMCR0);
+ if (before != after)
+ mmcr0_mismatch = true;
+
+out:
+ reset_ebb();
+}
+
+static int test_body(void)
+{
+ struct event event;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ setup_ebb_handler(syscall_ebb_callee);
+ ebb_global_enable();
+
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ while (ebb_state.stats.ebb_count < 20 && !mmcr0_mismatch)
+ FAIL_IF(core_busy_loop());
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ if (mmcr0_mismatch)
+ printf("Saw MMCR0 before 0x%lx after 0x%lx\n", before, after);
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+ FAIL_IF(mmcr0_mismatch);
+
+ return 0;
+}
+
+int pmae_handling(void)
+{
+ return eat_cpu(test_body);
+}
+
+int main(void)
+{
+ return test_harness(pmae_handling, "pmae_handling");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
new file mode 100644
index 000000000..8ca92b9ee
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that PMC5 & 6 are frozen (ie. don't overflow) when they are not being
+ * used. Tests the MMCR0_FC56 logic in the kernel.
+ */
+
+static int pmc56_overflowed;
+
+static void ebb_callee(void)
+{
+ uint64_t val;
+
+ val = mfspr(SPRN_BESCR);
+ if (!(val & BESCR_PMEO)) {
+ ebb_state.stats.spurious++;
+ goto out;
+ }
+
+ ebb_state.stats.ebb_count++;
+ count_pmc(2, sample_period);
+
+ val = mfspr(SPRN_PMC5);
+ if (val >= COUNTER_OVERFLOW)
+ pmc56_overflowed++;
+
+ count_pmc(5, COUNTER_OVERFLOW);
+
+ val = mfspr(SPRN_PMC6);
+ if (val >= COUNTER_OVERFLOW)
+ pmc56_overflowed++;
+
+ count_pmc(6, COUNTER_OVERFLOW);
+
+out:
+ reset_ebb();
+}
+
+int pmc56_overflow(void)
+{
+ struct event event;
+
+ SKIP_IF(!ebb_is_supported());
+
+ /* Use PMC2 so we set PMCjCE, which enables PMC5/6 */
+ event_init(&event, 0x2001e);
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ setup_ebb_handler(ebb_callee);
+ ebb_global_enable();
+
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_PMC2, pmc_sample_period(sample_period));
+ mtspr(SPRN_PMC5, 0);
+ mtspr(SPRN_PMC6, 0);
+
+ while (ebb_state.stats.ebb_count < 10)
+ FAIL_IF(core_busy_loop());
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ printf("PMC5/6 overflow %d\n", pmc56_overflowed);
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0 || pmc56_overflowed != 0);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(pmc56_overflow, "pmc56_overflow");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c
new file mode 100644
index 000000000..f923228bc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test basic access to the EBB regs, they should be user accessible with no
+ * kernel interaction required.
+ */
+int reg_access(void)
+{
+ uint64_t val, expected;
+
+ SKIP_IF(!ebb_is_supported());
+
+ expected = 0x8000000100000000ull;
+ mtspr(SPRN_BESCR, expected);
+ val = mfspr(SPRN_BESCR);
+
+ FAIL_IF(val != expected);
+
+ expected = 0x0000000001000000ull;
+ mtspr(SPRN_EBBHR, expected);
+ val = mfspr(SPRN_EBBHR);
+
+ FAIL_IF(val != expected);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(reg_access, "reg_access");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c
new file mode 100644
index 000000000..1846f4e84
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a pinned per-task event vs an EBB - in that order. The pinned per-task
+ * event should prevent the EBB event from being enabled.
+ */
+
+static int setup_child_event(struct event *event, pid_t child_pid)
+{
+ event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+ event->attr.pinned = 1;
+
+ event->attr.exclude_kernel = 1;
+ event->attr.exclude_hv = 1;
+ event->attr.exclude_idle = 1;
+
+ FAIL_IF(event_open_with_pid(event, child_pid));
+ FAIL_IF(event_enable(event));
+
+ return 0;
+}
+
+int task_event_pinned_vs_ebb(void)
+{
+ union pipe read_pipe, write_pipe;
+ struct event event;
+ pid_t pid;
+ int rc;
+
+ SKIP_IF(!ebb_is_supported());
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ /* NB order of pipes looks reversed */
+ exit(ebb_child(write_pipe, read_pipe));
+ }
+
+ /* We setup the task event first */
+ rc = setup_child_event(&event, pid);
+ if (rc) {
+ kill_child_and_wait(pid);
+ return rc;
+ }
+
+ /* Signal the child to install its EBB event and wait */
+ if (sync_with_child(read_pipe, write_pipe))
+ /* If it fails, wait for it to exit */
+ goto wait;
+
+ /* Signal the child to run */
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+ /* We expect it to fail to read the event */
+ FAIL_IF(wait_for_child(pid) != 2);
+ FAIL_IF(event_disable(&event));
+ FAIL_IF(event_read(&event));
+
+ event_report(&event);
+
+ FAIL_IF(event.result.value == 0);
+ /*
+ * For reasons I don't understand enabled is usually just slightly
+ * lower than running. Would be good to confirm why.
+ */
+ FAIL_IF(event.result.enabled == 0);
+ FAIL_IF(event.result.running == 0);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(task_event_pinned_vs_ebb, "task_event_pinned_vs_ebb");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c
new file mode 100644
index 000000000..e3bc6e92a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a per-task event vs an EBB - in that order. The EBB should push the
+ * per-task event off the PMU.
+ */
+
+static int setup_child_event(struct event *event, pid_t child_pid)
+{
+ event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+ event->attr.exclude_kernel = 1;
+ event->attr.exclude_hv = 1;
+ event->attr.exclude_idle = 1;
+
+ FAIL_IF(event_open_with_pid(event, child_pid));
+ FAIL_IF(event_enable(event));
+
+ return 0;
+}
+
+int task_event_vs_ebb(void)
+{
+ union pipe read_pipe, write_pipe;
+ struct event event;
+ pid_t pid;
+ int rc;
+
+ SKIP_IF(!ebb_is_supported());
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ /* NB order of pipes looks reversed */
+ exit(ebb_child(write_pipe, read_pipe));
+ }
+
+ /* We setup the task event first */
+ rc = setup_child_event(&event, pid);
+ if (rc) {
+ kill_child_and_wait(pid);
+ return rc;
+ }
+
+ /* Signal the child to install its EBB event and wait */
+ if (sync_with_child(read_pipe, write_pipe))
+ /* If it fails, wait for it to exit */
+ goto wait;
+
+ /* Signal the child to run */
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+ /* The EBB event should push the task event off so the child should succeed */
+ FAIL_IF(wait_for_child(pid));
+ FAIL_IF(event_disable(&event));
+ FAIL_IF(event_read(&event));
+
+ event_report(&event);
+
+ /* The task event may have run, or not so we can't assert anything about it */
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(task_event_vs_ebb, "task_event_vs_ebb");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/trace.c b/tools/testing/selftests/powerpc/pmu/ebb/trace.c
new file mode 100644
index 000000000..251e66ab2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/trace.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "trace.h"
+
+
+struct trace_buffer *trace_buffer_allocate(u64 size)
+{
+ struct trace_buffer *tb;
+
+ if (size < sizeof(*tb)) {
+ fprintf(stderr, "Error: trace buffer too small\n");
+ return NULL;
+ }
+
+ tb = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (tb == MAP_FAILED) {
+ perror("mmap");
+ return NULL;
+ }
+
+ tb->size = size;
+ tb->tail = tb->data;
+ tb->overflow = false;
+
+ return tb;
+}
+
+static bool trace_check_bounds(struct trace_buffer *tb, void *p)
+{
+ return p < ((void *)tb + tb->size);
+}
+
+static bool trace_check_alloc(struct trace_buffer *tb, void *p)
+{
+ /*
+ * If we ever overflowed don't allow any more input. This prevents us
+ * from dropping a large item and then later logging a small one. The
+ * buffer should just stop when overflow happened, not be patchy. If
+ * you're overflowing, make your buffer bigger.
+ */
+ if (tb->overflow)
+ return false;
+
+ if (!trace_check_bounds(tb, p)) {
+ tb->overflow = true;
+ return false;
+ }
+
+ return true;
+}
+
+static void *trace_alloc(struct trace_buffer *tb, int bytes)
+{
+ void *p, *newtail;
+
+ p = tb->tail;
+ newtail = tb->tail + bytes;
+ if (!trace_check_alloc(tb, newtail))
+ return NULL;
+
+ tb->tail = newtail;
+
+ return p;
+}
+
+static struct trace_entry *trace_alloc_entry(struct trace_buffer *tb, int payload_size)
+{
+ struct trace_entry *e;
+
+ e = trace_alloc(tb, sizeof(*e) + payload_size);
+ if (e)
+ e->length = payload_size;
+
+ return e;
+}
+
+int trace_log_reg(struct trace_buffer *tb, u64 reg, u64 value)
+{
+ struct trace_entry *e;
+ u64 *p;
+
+ e = trace_alloc_entry(tb, sizeof(reg) + sizeof(value));
+ if (!e)
+ return -ENOSPC;
+
+ e->type = TRACE_TYPE_REG;
+ p = (u64 *)e->data;
+ *p++ = reg;
+ *p++ = value;
+
+ return 0;
+}
+
+int trace_log_counter(struct trace_buffer *tb, u64 value)
+{
+ struct trace_entry *e;
+ u64 *p;
+
+ e = trace_alloc_entry(tb, sizeof(value));
+ if (!e)
+ return -ENOSPC;
+
+ e->type = TRACE_TYPE_COUNTER;
+ p = (u64 *)e->data;
+ *p++ = value;
+
+ return 0;
+}
+
+int trace_log_string(struct trace_buffer *tb, char *str)
+{
+ struct trace_entry *e;
+ char *p;
+ int len;
+
+ len = strlen(str);
+
+ /* We NULL terminate to make printing easier */
+ e = trace_alloc_entry(tb, len + 1);
+ if (!e)
+ return -ENOSPC;
+
+ e->type = TRACE_TYPE_STRING;
+ p = (char *)e->data;
+ memcpy(p, str, len);
+ p += len;
+ *p = '\0';
+
+ return 0;
+}
+
+int trace_log_indent(struct trace_buffer *tb)
+{
+ struct trace_entry *e;
+
+ e = trace_alloc_entry(tb, 0);
+ if (!e)
+ return -ENOSPC;
+
+ e->type = TRACE_TYPE_INDENT;
+
+ return 0;
+}
+
+int trace_log_outdent(struct trace_buffer *tb)
+{
+ struct trace_entry *e;
+
+ e = trace_alloc_entry(tb, 0);
+ if (!e)
+ return -ENOSPC;
+
+ e->type = TRACE_TYPE_OUTDENT;
+
+ return 0;
+}
+
+static void trace_print_header(int seq, int prefix)
+{
+ printf("%*s[%d]: ", prefix, "", seq);
+}
+
+static char *trace_decode_reg(int reg)
+{
+ switch (reg) {
+ case 769: return "SPRN_MMCR2"; break;
+ case 770: return "SPRN_MMCRA"; break;
+ case 779: return "SPRN_MMCR0"; break;
+ case 804: return "SPRN_EBBHR"; break;
+ case 805: return "SPRN_EBBRR"; break;
+ case 806: return "SPRN_BESCR"; break;
+ case 800: return "SPRN_BESCRS"; break;
+ case 801: return "SPRN_BESCRSU"; break;
+ case 802: return "SPRN_BESCRR"; break;
+ case 803: return "SPRN_BESCRRU"; break;
+ case 771: return "SPRN_PMC1"; break;
+ case 772: return "SPRN_PMC2"; break;
+ case 773: return "SPRN_PMC3"; break;
+ case 774: return "SPRN_PMC4"; break;
+ case 775: return "SPRN_PMC5"; break;
+ case 776: return "SPRN_PMC6"; break;
+ case 780: return "SPRN_SIAR"; break;
+ case 781: return "SPRN_SDAR"; break;
+ case 768: return "SPRN_SIER"; break;
+ }
+
+ return NULL;
+}
+
+static void trace_print_reg(struct trace_entry *e)
+{
+ u64 *p, *reg, *value;
+ char *name;
+
+ p = (u64 *)e->data;
+ reg = p++;
+ value = p;
+
+ name = trace_decode_reg(*reg);
+ if (name)
+ printf("register %-10s = 0x%016llx\n", name, *value);
+ else
+ printf("register %lld = 0x%016llx\n", *reg, *value);
+}
+
+static void trace_print_counter(struct trace_entry *e)
+{
+ u64 *value;
+
+ value = (u64 *)e->data;
+ printf("counter = %lld\n", *value);
+}
+
+static void trace_print_string(struct trace_entry *e)
+{
+ char *str;
+
+ str = (char *)e->data;
+ puts(str);
+}
+
+#define BASE_PREFIX 2
+#define PREFIX_DELTA 8
+
+static void trace_print_entry(struct trace_entry *e, int seq, int *prefix)
+{
+ switch (e->type) {
+ case TRACE_TYPE_REG:
+ trace_print_header(seq, *prefix);
+ trace_print_reg(e);
+ break;
+ case TRACE_TYPE_COUNTER:
+ trace_print_header(seq, *prefix);
+ trace_print_counter(e);
+ break;
+ case TRACE_TYPE_STRING:
+ trace_print_header(seq, *prefix);
+ trace_print_string(e);
+ break;
+ case TRACE_TYPE_INDENT:
+ trace_print_header(seq, *prefix);
+ puts("{");
+ *prefix += PREFIX_DELTA;
+ break;
+ case TRACE_TYPE_OUTDENT:
+ *prefix -= PREFIX_DELTA;
+ if (*prefix < BASE_PREFIX)
+ *prefix = BASE_PREFIX;
+ trace_print_header(seq, *prefix);
+ puts("}");
+ break;
+ default:
+ trace_print_header(seq, *prefix);
+ printf("entry @ %p type %d\n", e, e->type);
+ break;
+ }
+}
+
+void trace_buffer_print(struct trace_buffer *tb)
+{
+ struct trace_entry *e;
+ int i, prefix;
+ void *p;
+
+ printf("Trace buffer dump:\n");
+ printf(" address %p \n", tb);
+ printf(" tail %p\n", tb->tail);
+ printf(" size %llu\n", tb->size);
+ printf(" overflow %s\n", tb->overflow ? "TRUE" : "false");
+ printf(" Content:\n");
+
+ p = tb->data;
+
+ i = 0;
+ prefix = BASE_PREFIX;
+
+ while (trace_check_bounds(tb, p) && p < tb->tail) {
+ e = p;
+
+ trace_print_entry(e, i, &prefix);
+
+ i++;
+ p = (void *)e + sizeof(*e) + e->length;
+ }
+}
+
+void trace_print_location(struct trace_buffer *tb)
+{
+ printf("Trace buffer 0x%llx bytes @ %p\n", tb->size, tb);
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/trace.h b/tools/testing/selftests/powerpc/pmu/ebb/trace.h
new file mode 100644
index 000000000..926458e28
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/trace.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PMU_EBB_TRACE_H
+#define _SELFTESTS_POWERPC_PMU_EBB_TRACE_H
+
+#include "utils.h"
+
+#define TRACE_TYPE_REG 1
+#define TRACE_TYPE_COUNTER 2
+#define TRACE_TYPE_STRING 3
+#define TRACE_TYPE_INDENT 4
+#define TRACE_TYPE_OUTDENT 5
+
+struct trace_entry
+{
+ u8 type;
+ u8 length;
+ u8 data[0];
+};
+
+struct trace_buffer
+{
+ u64 size;
+ bool overflow;
+ void *tail;
+ u8 data[0];
+};
+
+struct trace_buffer *trace_buffer_allocate(u64 size);
+int trace_log_reg(struct trace_buffer *tb, u64 reg, u64 value);
+int trace_log_counter(struct trace_buffer *tb, u64 value);
+int trace_log_string(struct trace_buffer *tb, char *str);
+int trace_log_indent(struct trace_buffer *tb);
+int trace_log_outdent(struct trace_buffer *tb);
+void trace_buffer_print(struct trace_buffer *tb);
+void trace_print_location(struct trace_buffer *tb);
+
+#endif /* _SELFTESTS_POWERPC_PMU_EBB_TRACE_H */
diff --git a/tools/testing/selftests/powerpc/pmu/event.c b/tools/testing/selftests/powerpc/pmu/event.c
new file mode 100644
index 000000000..184b36807
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+
+#include "event.h"
+
+
+int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
+ int group_fd, unsigned long flags)
+{
+ return syscall(__NR_perf_event_open, attr, pid, cpu,
+ group_fd, flags);
+}
+
+void event_init_opts(struct event *e, u64 config, int type, char *name)
+{
+ memset(e, 0, sizeof(*e));
+
+ e->name = name;
+
+ e->attr.type = type;
+ e->attr.config = config;
+ e->attr.size = sizeof(e->attr);
+ /* This has to match the structure layout in the header */
+ e->attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | \
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
+}
+
+void event_init_named(struct event *e, u64 config, char *name)
+{
+ event_init_opts(e, config, PERF_TYPE_RAW, name);
+}
+
+void event_init(struct event *e, u64 config)
+{
+ event_init_opts(e, config, PERF_TYPE_RAW, "event");
+}
+
+#define PERF_CURRENT_PID 0
+#define PERF_NO_PID -1
+#define PERF_NO_CPU -1
+#define PERF_NO_GROUP -1
+
+int event_open_with_options(struct event *e, pid_t pid, int cpu, int group_fd)
+{
+ e->fd = perf_event_open(&e->attr, pid, cpu, group_fd, 0);
+ if (e->fd == -1) {
+ perror("perf_event_open");
+ return -1;
+ }
+
+ return 0;
+}
+
+int event_open_with_group(struct event *e, int group_fd)
+{
+ return event_open_with_options(e, PERF_CURRENT_PID, PERF_NO_CPU, group_fd);
+}
+
+int event_open_with_pid(struct event *e, pid_t pid)
+{
+ return event_open_with_options(e, pid, PERF_NO_CPU, PERF_NO_GROUP);
+}
+
+int event_open_with_cpu(struct event *e, int cpu)
+{
+ return event_open_with_options(e, PERF_NO_PID, cpu, PERF_NO_GROUP);
+}
+
+int event_open(struct event *e)
+{
+ return event_open_with_options(e, PERF_CURRENT_PID, PERF_NO_CPU, PERF_NO_GROUP);
+}
+
+void event_close(struct event *e)
+{
+ close(e->fd);
+}
+
+int event_enable(struct event *e)
+{
+ return ioctl(e->fd, PERF_EVENT_IOC_ENABLE);
+}
+
+int event_disable(struct event *e)
+{
+ return ioctl(e->fd, PERF_EVENT_IOC_DISABLE);
+}
+
+int event_reset(struct event *e)
+{
+ return ioctl(e->fd, PERF_EVENT_IOC_RESET);
+}
+
+int event_read(struct event *e)
+{
+ int rc;
+
+ rc = read(e->fd, &e->result, sizeof(e->result));
+ if (rc != sizeof(e->result)) {
+ fprintf(stderr, "read error on event %p!\n", e);
+ return -1;
+ }
+
+ return 0;
+}
+
+void event_report_justified(struct event *e, int name_width, int result_width)
+{
+ printf("%*s: result %*llu ", name_width, e->name, result_width,
+ e->result.value);
+
+ if (e->result.running == e->result.enabled)
+ printf("running/enabled %llu\n", e->result.running);
+ else
+ printf("running %llu enabled %llu\n", e->result.running,
+ e->result.enabled);
+}
+
+void event_report(struct event *e)
+{
+ event_report_justified(e, 0, 0);
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event.h b/tools/testing/selftests/powerpc/pmu/event.h
new file mode 100644
index 000000000..a0ea6b1ee
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PMU_EVENT_H
+#define _SELFTESTS_POWERPC_PMU_EVENT_H
+
+#include <unistd.h>
+#include <linux/perf_event.h>
+
+#include "utils.h"
+
+
+struct event {
+ struct perf_event_attr attr;
+ char *name;
+ int fd;
+ /* This must match the read_format we use */
+ struct {
+ u64 value;
+ u64 running;
+ u64 enabled;
+ } result;
+};
+
+void event_init(struct event *e, u64 config);
+void event_init_named(struct event *e, u64 config, char *name);
+void event_init_opts(struct event *e, u64 config, int type, char *name);
+int event_open_with_options(struct event *e, pid_t pid, int cpu, int group_fd);
+int event_open_with_group(struct event *e, int group_fd);
+int event_open_with_pid(struct event *e, pid_t pid);
+int event_open_with_cpu(struct event *e, int cpu);
+int event_open(struct event *e);
+void event_close(struct event *e);
+int event_enable(struct event *e);
+int event_disable(struct event *e);
+int event_reset(struct event *e);
+int event_read(struct event *e);
+void event_report_justified(struct event *e, int name_width, int result_width);
+void event_report(struct event *e);
+
+#endif /* _SELFTESTS_POWERPC_PMU_EVENT_H */
diff --git a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
new file mode 100644
index 000000000..77472f314
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "event.h"
+#include "utils.h"
+
+#define MALLOC_SIZE (0x10000 * 10) /* Ought to be enough .. */
+
+/*
+ * Tests that the L3 bank handling is correct. We fixed it in commit e9aaac1.
+ */
+static int l3_bank_test(void)
+{
+ struct event event;
+ char *p;
+ int i;
+
+ p = malloc(MALLOC_SIZE);
+ FAIL_IF(!p);
+
+ event_init(&event, 0x84918F);
+
+ FAIL_IF(event_open(&event));
+
+ for (i = 0; i < MALLOC_SIZE; i += 0x10000)
+ p[i] = i;
+
+ event_read(&event);
+ event_report(&event);
+
+ FAIL_IF(event.result.running == 0);
+ FAIL_IF(event.result.enabled == 0);
+
+ event_close(&event);
+ free(p);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(l3_bank_test, "l3_bank_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/lib.c b/tools/testing/selftests/powerpc/pmu/lib.c
new file mode 100644
index 000000000..5bf5dd408
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/lib.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE /* For CPU_ZERO etc. */
+
+#include <errno.h>
+#include <sched.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+
+#include "utils.h"
+#include "lib.h"
+
+
+int bind_to_cpu(int cpu)
+{
+ cpu_set_t mask;
+
+ printf("Binding to cpu %d\n", cpu);
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+
+ return sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+#define PARENT_TOKEN 0xAA
+#define CHILD_TOKEN 0x55
+
+int sync_with_child(union pipe read_pipe, union pipe write_pipe)
+{
+ char c = PARENT_TOKEN;
+
+ FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1);
+ FAIL_IF(read(read_pipe.read_fd, &c, 1) != 1);
+ if (c != CHILD_TOKEN) /* sometimes expected */
+ return 1;
+
+ return 0;
+}
+
+int wait_for_parent(union pipe read_pipe)
+{
+ char c;
+
+ FAIL_IF(read(read_pipe.read_fd, &c, 1) != 1);
+ FAIL_IF(c != PARENT_TOKEN);
+
+ return 0;
+}
+
+int notify_parent(union pipe write_pipe)
+{
+ char c = CHILD_TOKEN;
+
+ FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1);
+
+ return 0;
+}
+
+int notify_parent_of_error(union pipe write_pipe)
+{
+ char c = ~CHILD_TOKEN;
+
+ FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1);
+
+ return 0;
+}
+
+int wait_for_child(pid_t child_pid)
+{
+ int rc;
+
+ if (waitpid(child_pid, &rc, 0) == -1) {
+ perror("waitpid");
+ return 1;
+ }
+
+ if (WIFEXITED(rc))
+ rc = WEXITSTATUS(rc);
+ else
+ rc = 1; /* Signal or other */
+
+ return rc;
+}
+
+int kill_child_and_wait(pid_t child_pid)
+{
+ kill(child_pid, SIGTERM);
+
+ return wait_for_child(child_pid);
+}
+
+static int eat_cpu_child(union pipe read_pipe, union pipe write_pipe)
+{
+ volatile int i = 0;
+
+ /*
+ * We are just here to eat cpu and die. So make sure we can be killed,
+ * and also don't do any custom SIGTERM handling.
+ */
+ signal(SIGTERM, SIG_DFL);
+
+ notify_parent(write_pipe);
+ wait_for_parent(read_pipe);
+
+ /* Soak up cpu forever */
+ while (1) i++;
+
+ return 0;
+}
+
+pid_t eat_cpu(int (test_function)(void))
+{
+ union pipe read_pipe, write_pipe;
+ int cpu, rc;
+ pid_t pid;
+
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+ FAIL_IF(bind_to_cpu(cpu));
+
+ if (pipe(read_pipe.fds) == -1)
+ return -1;
+
+ if (pipe(write_pipe.fds) == -1)
+ return -1;
+
+ pid = fork();
+ if (pid == 0)
+ exit(eat_cpu_child(write_pipe, read_pipe));
+
+ if (sync_with_child(read_pipe, write_pipe)) {
+ rc = -1;
+ goto out;
+ }
+
+ printf("main test running as pid %d\n", getpid());
+
+ rc = test_function();
+out:
+ kill(pid, SIGKILL);
+
+ return rc;
+}
+
+struct addr_range libc, vdso;
+
+int parse_proc_maps(void)
+{
+ unsigned long start, end;
+ char execute, name[128];
+ FILE *f;
+ int rc;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f) {
+ perror("fopen");
+ return -1;
+ }
+
+ do {
+ /* This skips line with no executable which is what we want */
+ rc = fscanf(f, "%lx-%lx %*c%*c%c%*c %*x %*d:%*d %*d %127s\n",
+ &start, &end, &execute, name);
+ if (rc <= 0)
+ break;
+
+ if (execute != 'x')
+ continue;
+
+ if (strstr(name, "libc")) {
+ libc.first = start;
+ libc.last = end - 1;
+ } else if (strstr(name, "[vdso]")) {
+ vdso.first = start;
+ vdso.last = end - 1;
+ }
+ } while(1);
+
+ fclose(f);
+
+ return 0;
+}
+
+#define PARANOID_PATH "/proc/sys/kernel/perf_event_paranoid"
+
+bool require_paranoia_below(int level)
+{
+ long current;
+ char *end, buf[16];
+ FILE *f;
+ bool rc;
+
+ rc = false;
+
+ f = fopen(PARANOID_PATH, "r");
+ if (!f) {
+ perror("fopen");
+ goto out;
+ }
+
+ if (!fgets(buf, sizeof(buf), f)) {
+ printf("Couldn't read " PARANOID_PATH "?\n");
+ goto out_close;
+ }
+
+ current = strtol(buf, &end, 10);
+
+ if (end == buf) {
+ printf("Couldn't parse " PARANOID_PATH "?\n");
+ goto out_close;
+ }
+
+ if (current >= level)
+ goto out_close;
+
+ rc = true;
+out_close:
+ fclose(f);
+out:
+ return rc;
+}
+
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h
new file mode 100644
index 000000000..0213af4ff
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef __SELFTESTS_POWERPC_PMU_LIB_H
+#define __SELFTESTS_POWERPC_PMU_LIB_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+union pipe {
+ struct {
+ int read_fd;
+ int write_fd;
+ };
+ int fds[2];
+};
+
+extern int bind_to_cpu(int cpu);
+extern int kill_child_and_wait(pid_t child_pid);
+extern int wait_for_child(pid_t child_pid);
+extern int sync_with_child(union pipe read_pipe, union pipe write_pipe);
+extern int wait_for_parent(union pipe read_pipe);
+extern int notify_parent(union pipe write_pipe);
+extern int notify_parent_of_error(union pipe write_pipe);
+extern pid_t eat_cpu(int (test_function)(void));
+extern bool require_paranoia_below(int level);
+
+struct addr_range {
+ uint64_t first, last;
+};
+
+extern struct addr_range libc, vdso;
+
+int parse_proc_maps(void);
+
+#endif /* __SELFTESTS_POWERPC_PMU_LIB_H */
diff --git a/tools/testing/selftests/powerpc/pmu/loop.S b/tools/testing/selftests/powerpc/pmu/loop.S
new file mode 100644
index 000000000..20c1f0876
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/loop.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+
+ .text
+
+FUNC_START(thirty_two_instruction_loop)
+ cmpdi r3,0
+ beqlr
+ addi r4,r3,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1 # 28 addi's
+ subi r3,r3,1
+ b FUNC_NAME(thirty_two_instruction_loop)
+FUNC_END(thirty_two_instruction_loop)
diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
new file mode 100644
index 000000000..fddbbc9ca
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <elf.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "event.h"
+#include "lib.h"
+#include "utils.h"
+
+/*
+ * Test that per-event excludes work.
+ */
+
+static int per_event_excludes(void)
+{
+ struct event *e, events[4];
+ char *platform;
+ int i;
+
+ platform = (char *)get_auxv_entry(AT_BASE_PLATFORM);
+ FAIL_IF(!platform);
+ SKIP_IF(strcmp(platform, "power8") != 0);
+
+ /*
+ * We need to create the events disabled, otherwise the running/enabled
+ * counts don't match up.
+ */
+ e = &events[0];
+ event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+ PERF_TYPE_HARDWARE, "instructions");
+ e->attr.disabled = 1;
+
+ e = &events[1];
+ event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+ PERF_TYPE_HARDWARE, "instructions(k)");
+ e->attr.disabled = 1;
+ e->attr.exclude_user = 1;
+ e->attr.exclude_hv = 1;
+
+ e = &events[2];
+ event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+ PERF_TYPE_HARDWARE, "instructions(h)");
+ e->attr.disabled = 1;
+ e->attr.exclude_user = 1;
+ e->attr.exclude_kernel = 1;
+
+ e = &events[3];
+ event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+ PERF_TYPE_HARDWARE, "instructions(u)");
+ e->attr.disabled = 1;
+ e->attr.exclude_hv = 1;
+ e->attr.exclude_kernel = 1;
+
+ FAIL_IF(event_open(&events[0]));
+
+ /*
+ * The open here will fail if we don't have per event exclude support,
+ * because the second event has an incompatible set of exclude settings
+ * and we're asking for the events to be in a group.
+ */
+ for (i = 1; i < 4; i++)
+ FAIL_IF(event_open_with_group(&events[i], events[0].fd));
+
+ /*
+ * Even though the above will fail without per-event excludes we keep
+ * testing in order to be thorough.
+ */
+ prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+ /* Spin for a while */
+ for (i = 0; i < INT_MAX; i++)
+ asm volatile("" : : : "memory");
+
+ prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+ for (i = 0; i < 4; i++) {
+ FAIL_IF(event_read(&events[i]));
+ event_report(&events[i]);
+ }
+
+ /*
+ * We should see that all events have enabled == running. That
+ * shows that they were all on the PMU at once.
+ */
+ for (i = 0; i < 4; i++)
+ FAIL_IF(events[i].result.running != events[i].result.enabled);
+
+ /*
+ * We can also check that the result for instructions is >= all the
+ * other counts. That's because it is counting all instructions while
+ * the others are counting a subset.
+ */
+ for (i = 1; i < 4; i++)
+ FAIL_IF(events[0].result.value < events[i].result.value);
+
+ for (i = 0; i < 4; i++)
+ event_close(&events[i]);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(per_event_excludes, "per_event_excludes");
+}
diff --git a/tools/testing/selftests/powerpc/primitives/.gitignore b/tools/testing/selftests/powerpc/primitives/.gitignore
new file mode 100644
index 000000000..4cc4e31be
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/.gitignore
@@ -0,0 +1 @@
+load_unaligned_zeropad
diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile
new file mode 100644
index 000000000..ea2b7bd09
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/Makefile
@@ -0,0 +1,8 @@
+CFLAGS += -I$(CURDIR)
+
+TEST_GEN_PROGS := load_unaligned_zeropad
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h b/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h
new file mode 120000
index 000000000..b14255e15
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h
@@ -0,0 +1 @@
+../.././../../../../arch/powerpc/include/asm/asm-compat.h \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/asm-const.h b/tools/testing/selftests/powerpc/primitives/asm/asm-const.h
new file mode 120000
index 000000000..18d8be13e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/asm-const.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/asm-const.h \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h b/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h
new file mode 120000
index 000000000..8dc6d4d46
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/feature-fixups.h \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/firmware.h b/tools/testing/selftests/powerpc/primitives/asm/firmware.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/firmware.h
diff --git a/tools/testing/selftests/powerpc/primitives/asm/ppc-opcode.h b/tools/testing/selftests/powerpc/primitives/asm/ppc-opcode.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/ppc-opcode.h
diff --git a/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h b/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h
new file mode 120000
index 000000000..66c819322
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/ppc_asm.h \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/processor.h b/tools/testing/selftests/powerpc/primitives/asm/processor.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/processor.h
diff --git a/tools/testing/selftests/powerpc/primitives/linux/stringify.h b/tools/testing/selftests/powerpc/primitives/linux/stringify.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/linux/stringify.h
diff --git a/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c
new file mode 100644
index 000000000..ed3239bbf
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c
@@ -0,0 +1,159 @@
+/*
+ * Userspace test harness for load_unaligned_zeropad. Creates two
+ * pages and uses mprotect to prevent access to the second page and
+ * a SEGV handler that walks the exception tables and runs the fixup
+ * routine.
+ *
+ * The results are compared against a normal load that is that is
+ * performed while access to the second page is enabled via mprotect.
+ *
+ * Copyright (C) 2014 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#define FIXUP_SECTION ".ex_fixup"
+
+static inline unsigned long __fls(unsigned long x);
+
+#include "word-at-a-time.h"
+
+#include "utils.h"
+
+static inline unsigned long __fls(unsigned long x)
+{
+ int lz;
+
+ asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (x));
+ return sizeof(unsigned long) - 1 - lz;
+}
+
+static int page_size;
+static char *mem_region;
+
+static int protect_region(void)
+{
+ if (mprotect(mem_region + page_size, page_size, PROT_NONE)) {
+ perror("mprotect");
+ return 1;
+ }
+
+ return 0;
+}
+
+static int unprotect_region(void)
+{
+ if (mprotect(mem_region + page_size, page_size, PROT_READ|PROT_WRITE)) {
+ perror("mprotect");
+ return 1;
+ }
+
+ return 0;
+}
+
+extern char __start___ex_table[];
+extern char __stop___ex_table[];
+
+#if defined(__powerpc64__)
+#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP]
+#elif defined(__powerpc__)
+#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.uc_regs->gregs[PT_NIP]
+#else
+#error implement UCONTEXT_NIA
+#endif
+
+struct extbl_entry {
+ int insn;
+ int fixup;
+};
+
+static void segv_handler(int signr, siginfo_t *info, void *ptr)
+{
+ ucontext_t *uc = (ucontext_t *)ptr;
+ unsigned long addr = (unsigned long)info->si_addr;
+ unsigned long *ip = &UCONTEXT_NIA(uc);
+ struct extbl_entry *entry = (struct extbl_entry *)__start___ex_table;
+
+ while (entry < (struct extbl_entry *)__stop___ex_table) {
+ unsigned long insn, fixup;
+
+ insn = (unsigned long)&entry->insn + entry->insn;
+ fixup = (unsigned long)&entry->fixup + entry->fixup;
+
+ if (insn == *ip) {
+ *ip = fixup;
+ return;
+ }
+ }
+
+ printf("No exception table match for NIA %lx ADDR %lx\n", *ip, addr);
+ abort();
+}
+
+static void setup_segv_handler(void)
+{
+ struct sigaction action;
+
+ memset(&action, 0, sizeof(action));
+ action.sa_sigaction = segv_handler;
+ action.sa_flags = SA_SIGINFO;
+ sigaction(SIGSEGV, &action, NULL);
+}
+
+static int do_one_test(char *p, int page_offset)
+{
+ unsigned long should;
+ unsigned long got;
+
+ FAIL_IF(unprotect_region());
+ should = *(unsigned long *)p;
+ FAIL_IF(protect_region());
+
+ got = load_unaligned_zeropad(p);
+
+ if (should != got) {
+ printf("offset %u load_unaligned_zeropad returned 0x%lx, should be 0x%lx\n", page_offset, got, should);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int test_body(void)
+{
+ unsigned long i;
+
+ page_size = getpagesize();
+ mem_region = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+
+ FAIL_IF(mem_region == MAP_FAILED);
+
+ for (i = 0; i < page_size; i++)
+ mem_region[i] = i;
+
+ memset(mem_region+page_size, 0, page_size);
+
+ setup_segv_handler();
+
+ for (i = 0; i < page_size; i++)
+ FAIL_IF(do_one_test(mem_region+i, i));
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_body, "load_unaligned_zeropad");
+}
diff --git a/tools/testing/selftests/powerpc/primitives/word-at-a-time.h b/tools/testing/selftests/powerpc/primitives/word-at-a-time.h
new file mode 120000
index 000000000..eb74401b5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/word-at-a-time.h
@@ -0,0 +1 @@
+../../../../../arch/powerpc/include/asm/word-at-a-time.h \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore
new file mode 100644
index 000000000..07ec449a2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/.gitignore
@@ -0,0 +1,12 @@
+ptrace-gpr
+ptrace-tm-gpr
+ptrace-tm-spd-gpr
+ptrace-tar
+ptrace-tm-tar
+ptrace-tm-spd-tar
+ptrace-vsx
+ptrace-tm-vsx
+ptrace-tm-spd-vsx
+ptrace-tm-spr
+ptrace-hwbreak
+perf-hwbreak
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
new file mode 100644
index 000000000..9f9423430
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
+ ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
+ ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
+ perf-hwbreak
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie
+
+$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: child.h
+$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
diff --git a/tools/testing/selftests/powerpc/ptrace/child.h b/tools/testing/selftests/powerpc/ptrace/child.h
new file mode 100644
index 000000000..d7275b7b3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/child.h
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Helper functions to sync execution between parent and child processes.
+ *
+ * Copyright 2018, Thiago Jung Bauermann, IBM Corporation.
+ */
+#include <stdio.h>
+#include <stdbool.h>
+#include <semaphore.h>
+
+/*
+ * Information in a shared memory location for synchronization between child and
+ * parent.
+ */
+struct child_sync {
+ /* The parent waits on this semaphore. */
+ sem_t sem_parent;
+
+ /* If true, the child should give up as well. */
+ bool parent_gave_up;
+
+ /* The child waits on this semaphore. */
+ sem_t sem_child;
+
+ /* If true, the parent should give up as well. */
+ bool child_gave_up;
+};
+
+#define CHILD_FAIL_IF(x, sync) \
+ do { \
+ if (x) { \
+ fprintf(stderr, \
+ "[FAIL] Test FAILED on line %d\n", __LINE__); \
+ (sync)->child_gave_up = true; \
+ prod_parent(sync); \
+ return 1; \
+ } \
+ } while (0)
+
+#define PARENT_FAIL_IF(x, sync) \
+ do { \
+ if (x) { \
+ fprintf(stderr, \
+ "[FAIL] Test FAILED on line %d\n", __LINE__); \
+ (sync)->parent_gave_up = true; \
+ prod_child(sync); \
+ return 1; \
+ } \
+ } while (0)
+
+#define PARENT_SKIP_IF_UNSUPPORTED(x, sync) \
+ do { \
+ if ((x) == -1 && (errno == ENODEV || errno == EINVAL)) { \
+ (sync)->parent_gave_up = true; \
+ prod_child(sync); \
+ SKIP_IF(1); \
+ } \
+ } while (0)
+
+int init_child_sync(struct child_sync *sync)
+{
+ int ret;
+
+ ret = sem_init(&sync->sem_parent, 1, 0);
+ if (ret) {
+ perror("Semaphore initialization failed");
+ return 1;
+ }
+
+ ret = sem_init(&sync->sem_child, 1, 0);
+ if (ret) {
+ perror("Semaphore initialization failed");
+ return 1;
+ }
+
+ return 0;
+}
+
+void destroy_child_sync(struct child_sync *sync)
+{
+ sem_destroy(&sync->sem_parent);
+ sem_destroy(&sync->sem_child);
+}
+
+int wait_child(struct child_sync *sync)
+{
+ int ret;
+
+ /* Wait until the child prods us. */
+ ret = sem_wait(&sync->sem_parent);
+ if (ret) {
+ perror("Error waiting for child");
+ return 1;
+ }
+
+ return sync->child_gave_up;
+}
+
+int prod_child(struct child_sync *sync)
+{
+ int ret;
+
+ /* Unblock the child now. */
+ ret = sem_post(&sync->sem_child);
+ if (ret) {
+ perror("Error prodding child");
+ return 1;
+ }
+
+ return 0;
+}
+
+int wait_parent(struct child_sync *sync)
+{
+ int ret;
+
+ /* Wait until the parent prods us. */
+ ret = sem_wait(&sync->sem_child);
+ if (ret) {
+ perror("Error waiting for parent");
+ return 1;
+ }
+
+ return sync->parent_gave_up;
+}
+
+int prod_parent(struct child_sync *sync)
+{
+ int ret;
+
+ /* Unblock the parent now. */
+ ret = sem_post(&sync->sem_parent);
+ if (ret) {
+ perror("Error prodding parent");
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
new file mode 100644
index 000000000..d5c64fee0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -0,0 +1,462 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Ptrace test for Memory Protection Key registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ * Copyright (C) 2018 IBM Corporation.
+ */
+#include <limits.h>
+#include <linux/kernel.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "ptrace.h"
+#include "child.h"
+
+#ifndef __NR_pkey_alloc
+#define __NR_pkey_alloc 384
+#endif
+
+#ifndef __NR_pkey_free
+#define __NR_pkey_free 385
+#endif
+
+#ifndef NT_PPC_PKEY
+#define NT_PPC_PKEY 0x110
+#endif
+
+#ifndef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE 0x4
+#endif
+
+#define AMR_BITS_PER_PKEY 2
+#define PKEY_REG_BITS (sizeof(u64) * 8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY))
+
+#define CORE_FILE_LIMIT (5 * 1024 * 1024) /* 5 MB should be enough */
+
+static const char core_pattern_file[] = "/proc/sys/kernel/core_pattern";
+
+static const char user_write[] = "[User Write (Running)]";
+static const char core_read_running[] = "[Core Read (Running)]";
+
+/* Information shared between the parent and the child. */
+struct shared_info {
+ struct child_sync child_sync;
+
+ /* AMR value the parent expects to read in the core file. */
+ unsigned long amr;
+
+ /* IAMR value the parent expects to read in the core file. */
+ unsigned long iamr;
+
+ /* UAMOR value the parent expects to read in the core file. */
+ unsigned long uamor;
+
+ /* When the child crashed. */
+ time_t core_time;
+};
+
+static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
+{
+ return syscall(__NR_pkey_alloc, flags, init_access_rights);
+}
+
+static int sys_pkey_free(int pkey)
+{
+ return syscall(__NR_pkey_free, pkey);
+}
+
+static int increase_core_file_limit(void)
+{
+ struct rlimit rlim;
+ int ret;
+
+ ret = getrlimit(RLIMIT_CORE, &rlim);
+ FAIL_IF(ret);
+
+ if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) {
+ rlim.rlim_cur = CORE_FILE_LIMIT;
+
+ if (rlim.rlim_max != RLIM_INFINITY &&
+ rlim.rlim_max < CORE_FILE_LIMIT)
+ rlim.rlim_max = CORE_FILE_LIMIT;
+
+ ret = setrlimit(RLIMIT_CORE, &rlim);
+ FAIL_IF(ret);
+ }
+
+ ret = getrlimit(RLIMIT_FSIZE, &rlim);
+ FAIL_IF(ret);
+
+ if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) {
+ rlim.rlim_cur = CORE_FILE_LIMIT;
+
+ if (rlim.rlim_max != RLIM_INFINITY &&
+ rlim.rlim_max < CORE_FILE_LIMIT)
+ rlim.rlim_max = CORE_FILE_LIMIT;
+
+ ret = setrlimit(RLIMIT_FSIZE, &rlim);
+ FAIL_IF(ret);
+ }
+
+ return TEST_PASS;
+}
+
+static int child(struct shared_info *info)
+{
+ bool disable_execute = true;
+ int pkey1, pkey2, pkey3;
+ int *ptr, ret;
+
+ /* Wait until parent fills out the initial register values. */
+ ret = wait_parent(&info->child_sync);
+ if (ret)
+ return ret;
+
+ ret = increase_core_file_limit();
+ FAIL_IF(ret);
+
+ /* Get some pkeys so that we can change their bits in the AMR. */
+ pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
+ if (pkey1 < 0) {
+ pkey1 = sys_pkey_alloc(0, 0);
+ FAIL_IF(pkey1 < 0);
+
+ disable_execute = false;
+ }
+
+ pkey2 = sys_pkey_alloc(0, 0);
+ FAIL_IF(pkey2 < 0);
+
+ pkey3 = sys_pkey_alloc(0, 0);
+ FAIL_IF(pkey3 < 0);
+
+ info->amr |= 3ul << pkeyshift(pkey1) | 2ul << pkeyshift(pkey2);
+
+ if (disable_execute)
+ info->iamr |= 1ul << pkeyshift(pkey1);
+ else
+ info->iamr &= ~(1ul << pkeyshift(pkey1));
+
+ info->iamr &= ~(1ul << pkeyshift(pkey2) | 1ul << pkeyshift(pkey3));
+
+ info->uamor |= 3ul << pkeyshift(pkey1) | 3ul << pkeyshift(pkey2);
+
+ printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
+ user_write, info->amr, pkey1, pkey2, pkey3);
+
+ mtspr(SPRN_AMR, info->amr);
+
+ /*
+ * We won't use pkey3. This tests whether the kernel restores the UAMOR
+ * permissions after a key is freed.
+ */
+ sys_pkey_free(pkey3);
+
+ info->core_time = time(NULL);
+
+ /* Crash. */
+ ptr = 0;
+ *ptr = 1;
+
+ /* Shouldn't get here. */
+ FAIL_IF(true);
+
+ return TEST_FAIL;
+}
+
+/* Return file size if filename exists and pass sanity check, or zero if not. */
+static off_t try_core_file(const char *filename, struct shared_info *info,
+ pid_t pid)
+{
+ struct stat buf;
+ int ret;
+
+ ret = stat(filename, &buf);
+ if (ret == -1)
+ return TEST_FAIL;
+
+ /* Make sure we're not using a stale core file. */
+ return buf.st_mtime >= info->core_time ? buf.st_size : TEST_FAIL;
+}
+
+static Elf64_Nhdr *next_note(Elf64_Nhdr *nhdr)
+{
+ return (void *) nhdr + sizeof(*nhdr) +
+ __ALIGN_KERNEL(nhdr->n_namesz, 4) +
+ __ALIGN_KERNEL(nhdr->n_descsz, 4);
+}
+
+static int check_core_file(struct shared_info *info, Elf64_Ehdr *ehdr,
+ off_t core_size)
+{
+ unsigned long *regs;
+ Elf64_Phdr *phdr;
+ Elf64_Nhdr *nhdr;
+ size_t phdr_size;
+ void *p = ehdr, *note;
+ int ret;
+
+ ret = memcmp(ehdr->e_ident, ELFMAG, SELFMAG);
+ FAIL_IF(ret);
+
+ FAIL_IF(ehdr->e_type != ET_CORE);
+ FAIL_IF(ehdr->e_machine != EM_PPC64);
+ FAIL_IF(ehdr->e_phoff == 0 || ehdr->e_phnum == 0);
+
+ /*
+ * e_phnum is at most 65535 so calculating the size of the
+ * program header cannot overflow.
+ */
+ phdr_size = sizeof(*phdr) * ehdr->e_phnum;
+
+ /* Sanity check the program header table location. */
+ FAIL_IF(ehdr->e_phoff + phdr_size < ehdr->e_phoff);
+ FAIL_IF(ehdr->e_phoff + phdr_size > core_size);
+
+ /* Find the PT_NOTE segment. */
+ for (phdr = p + ehdr->e_phoff;
+ (void *) phdr < p + ehdr->e_phoff + phdr_size;
+ phdr += ehdr->e_phentsize)
+ if (phdr->p_type == PT_NOTE)
+ break;
+
+ FAIL_IF((void *) phdr >= p + ehdr->e_phoff + phdr_size);
+
+ /* Find the NT_PPC_PKEY note. */
+ for (nhdr = p + phdr->p_offset;
+ (void *) nhdr < p + phdr->p_offset + phdr->p_filesz;
+ nhdr = next_note(nhdr))
+ if (nhdr->n_type == NT_PPC_PKEY)
+ break;
+
+ FAIL_IF((void *) nhdr >= p + phdr->p_offset + phdr->p_filesz);
+ FAIL_IF(nhdr->n_descsz == 0);
+
+ p = nhdr;
+ note = p + sizeof(*nhdr) + __ALIGN_KERNEL(nhdr->n_namesz, 4);
+
+ regs = (unsigned long *) note;
+
+ printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+ core_read_running, regs[0], regs[1], regs[2]);
+
+ FAIL_IF(regs[0] != info->amr);
+ FAIL_IF(regs[1] != info->iamr);
+ FAIL_IF(regs[2] != info->uamor);
+
+ return TEST_PASS;
+}
+
+static int parent(struct shared_info *info, pid_t pid)
+{
+ char *filenames, *filename[3];
+ int fd, i, ret, status;
+ unsigned long regs[3];
+ off_t core_size;
+ void *core;
+
+ /*
+ * Get the initial values for AMR, IAMR and UAMOR and communicate them
+ * to the child.
+ */
+ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+ PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ info->amr = regs[0];
+ info->iamr = regs[1];
+ info->uamor = regs[2];
+
+ /* Wake up child so that it can set itself up. */
+ ret = prod_child(&info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait(&status);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ } else if (!WIFSIGNALED(status) || !WCOREDUMP(status)) {
+ printf("Child didn't dump core\n");
+ return TEST_FAIL;
+ }
+
+ /* Construct array of core file names to try. */
+
+ filename[0] = filenames = malloc(PATH_MAX);
+ if (!filenames) {
+ perror("Error allocating memory");
+ return TEST_FAIL;
+ }
+
+ ret = snprintf(filename[0], PATH_MAX, "core-pkey.%d", pid);
+ if (ret < 0 || ret >= PATH_MAX) {
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ filename[1] = filename[0] + ret + 1;
+ ret = snprintf(filename[1], PATH_MAX - ret - 1, "core.%d", pid);
+ if (ret < 0 || ret >= PATH_MAX - ret - 1) {
+ ret = TEST_FAIL;
+ goto out;
+ }
+ filename[2] = "core";
+
+ for (i = 0; i < 3; i++) {
+ core_size = try_core_file(filename[i], info, pid);
+ if (core_size != TEST_FAIL)
+ break;
+ }
+
+ if (i == 3) {
+ printf("Couldn't find core file\n");
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ fd = open(filename[i], O_RDONLY);
+ if (fd == -1) {
+ perror("Error opening core file");
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ core = mmap(NULL, core_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (core == (void *) -1) {
+ perror("Error mmaping core file");
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ ret = check_core_file(info, core, core_size);
+
+ munmap(core, core_size);
+ close(fd);
+ unlink(filename[i]);
+
+ out:
+ free(filenames);
+
+ return ret;
+}
+
+static int write_core_pattern(const char *core_pattern)
+{
+ size_t len = strlen(core_pattern), ret;
+ FILE *f;
+
+ f = fopen(core_pattern_file, "w");
+ SKIP_IF_MSG(!f, "Try with root privileges");
+
+ ret = fwrite(core_pattern, 1, len, f);
+ fclose(f);
+ if (ret != len) {
+ perror("Error writing to core_pattern file");
+ return TEST_FAIL;
+ }
+
+ return TEST_PASS;
+}
+
+static int setup_core_pattern(char **core_pattern_, bool *changed_)
+{
+ FILE *f;
+ char *core_pattern;
+ int ret;
+
+ core_pattern = malloc(PATH_MAX);
+ if (!core_pattern) {
+ perror("Error allocating memory");
+ return TEST_FAIL;
+ }
+
+ f = fopen(core_pattern_file, "r");
+ if (!f) {
+ perror("Error opening core_pattern file");
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ ret = fread(core_pattern, 1, PATH_MAX, f);
+ fclose(f);
+ if (!ret) {
+ perror("Error reading core_pattern file");
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ /* Check whether we can predict the name of the core file. */
+ if (!strcmp(core_pattern, "core") || !strcmp(core_pattern, "core.%p"))
+ *changed_ = false;
+ else {
+ ret = write_core_pattern("core-pkey.%p");
+ if (ret)
+ goto out;
+
+ *changed_ = true;
+ }
+
+ *core_pattern_ = core_pattern;
+ ret = TEST_PASS;
+
+ out:
+ if (ret)
+ free(core_pattern);
+
+ return ret;
+}
+
+static int core_pkey(void)
+{
+ char *core_pattern;
+ bool changed_core_pattern;
+ struct shared_info *info;
+ int shm_id;
+ int ret;
+ pid_t pid;
+
+ ret = setup_core_pattern(&core_pattern, &changed_core_pattern);
+ if (ret)
+ return ret;
+
+ shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT);
+ info = shmat(shm_id, NULL, 0);
+
+ ret = init_child_sync(&info->child_sync);
+ if (ret)
+ return ret;
+
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ ret = TEST_FAIL;
+ } else if (pid == 0)
+ ret = child(info);
+ else
+ ret = parent(info, pid);
+
+ shmdt(info);
+
+ if (pid) {
+ destroy_child_sync(&info->child_sync);
+ shmctl(shm_id, IPC_RMID, NULL);
+
+ if (changed_core_pattern)
+ write_core_pattern(core_pattern);
+ }
+
+ free(core_pattern);
+
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(core_pkey, "core_pkey");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
new file mode 100644
index 000000000..60df0b5e6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
@@ -0,0 +1,195 @@
+/*
+ * perf events self profiling example test case for hw breakpoints.
+ *
+ * This tests perf PERF_TYPE_BREAKPOINT parameters
+ * 1) tests all variants of the break on read/write flags
+ * 2) tests exclude_user == 0 and 1
+ * 3) test array matches (if DAWR is supported))
+ * 4) test different numbers of breakpoints matches
+ *
+ * Configure this breakpoint, then read and write the data a number of
+ * times. Then check the output count from perf is as expected.
+ *
+ * Based on:
+ * http://ozlabs.org/~anton/junkcode/perf_events_example1.c
+ *
+ * Copyright (C) 2018 Michael Neuling, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <unistd.h>
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <elf.h>
+#include <pthread.h>
+#include <sys/syscall.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+#include "utils.h"
+
+#define MAX_LOOPS 10000
+
+#define DAWR_LENGTH_MAX ((0x3f + 1) * 8)
+
+static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid,
+ int cpu, int group_fd,
+ unsigned long flags)
+{
+ attr->size = sizeof(*attr);
+ return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static inline bool breakpoint_test(int len)
+{
+ struct perf_event_attr attr;
+ int fd;
+
+ /* setup counters */
+ memset(&attr, 0, sizeof(attr));
+ attr.disabled = 1;
+ attr.type = PERF_TYPE_BREAKPOINT;
+ attr.bp_type = HW_BREAKPOINT_R;
+ /* bp_addr can point anywhere but needs to be aligned */
+ attr.bp_addr = (__u64)(&attr) & 0xfffffffffffff800;
+ attr.bp_len = len;
+ fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ if (fd < 0)
+ return false;
+ close(fd);
+ return true;
+}
+
+static inline bool perf_breakpoint_supported(void)
+{
+ return breakpoint_test(4);
+}
+
+static inline bool dawr_supported(void)
+{
+ return breakpoint_test(DAWR_LENGTH_MAX);
+}
+
+static int runtestsingle(int readwriteflag, int exclude_user, int arraytest)
+{
+ int i,j;
+ struct perf_event_attr attr;
+ size_t res;
+ unsigned long long breaks, needed;
+ int readint;
+ int readintarraybig[2*DAWR_LENGTH_MAX/sizeof(int)];
+ int *readintalign;
+ volatile int *ptr;
+ int break_fd;
+ int loop_num = MAX_LOOPS - (rand() % 100); /* provide some variability */
+ volatile int *k;
+
+ /* align to 0x400 boundary as required by DAWR */
+ readintalign = (int *)(((unsigned long)readintarraybig + 0x7ff) &
+ 0xfffffffffffff800);
+
+ ptr = &readint;
+ if (arraytest)
+ ptr = &readintalign[0];
+
+ /* setup counters */
+ memset(&attr, 0, sizeof(attr));
+ attr.disabled = 1;
+ attr.type = PERF_TYPE_BREAKPOINT;
+ attr.bp_type = readwriteflag;
+ attr.bp_addr = (__u64)ptr;
+ attr.bp_len = sizeof(int);
+ if (arraytest)
+ attr.bp_len = DAWR_LENGTH_MAX;
+ attr.exclude_user = exclude_user;
+ break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ if (break_fd < 0) {
+ perror("sys_perf_event_open");
+ exit(1);
+ }
+
+ /* start counters */
+ ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+
+ /* Test a bunch of reads and writes */
+ k = &readint;
+ for (i = 0; i < loop_num; i++) {
+ if (arraytest)
+ k = &(readintalign[i % (DAWR_LENGTH_MAX/sizeof(int))]);
+
+ j = *k;
+ *k = j;
+ }
+
+ /* stop counters */
+ ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+
+ /* read and check counters */
+ res = read(break_fd, &breaks, sizeof(unsigned long long));
+ assert(res == sizeof(unsigned long long));
+ /* we read and write each loop, so subtract the ones we are counting */
+ needed = 0;
+ if (readwriteflag & HW_BREAKPOINT_R)
+ needed += loop_num;
+ if (readwriteflag & HW_BREAKPOINT_W)
+ needed += loop_num;
+ needed = needed * (1 - exclude_user);
+ printf("TESTED: addr:0x%lx brks:% 8lld loops:% 8i rw:%i !user:%i array:%i\n",
+ (unsigned long int)ptr, breaks, loop_num, readwriteflag, exclude_user, arraytest);
+ if (breaks != needed) {
+ printf("FAILED: 0x%lx brks:%lld needed:%lli %i %i %i\n\n",
+ (unsigned long int)ptr, breaks, needed, loop_num, readwriteflag, exclude_user);
+ return 1;
+ }
+ close(break_fd);
+
+ return 0;
+}
+
+static int runtest(void)
+{
+ int rwflag;
+ int exclude_user;
+ int ret;
+
+ /*
+ * perf defines rwflag as two bits read and write and at least
+ * one must be set. So range 1-3.
+ */
+ for (rwflag = 1 ; rwflag < 4; rwflag++) {
+ for (exclude_user = 0 ; exclude_user < 2; exclude_user++) {
+ ret = runtestsingle(rwflag, exclude_user, 0);
+ if (ret)
+ return ret;
+
+ /* if we have the dawr, we can do an array test */
+ if (!dawr_supported())
+ continue;
+ ret = runtestsingle(rwflag, exclude_user, 1);
+ if (ret)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+
+static int perf_hwbreak(void)
+{
+ srand ( time(NULL) );
+
+ SKIP_IF(!perf_breakpoint_supported());
+
+ return runtest();
+}
+
+int main(int argc, char *argv[], char **envp)
+{
+ return test_harness(perf_hwbreak, "perf_hwbreak");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c
new file mode 100644
index 000000000..ca29fafee
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c
@@ -0,0 +1,123 @@
+/*
+ * Ptrace test for GPR/FPR registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "ptrace-gpr.h"
+#include "reg.h"
+
+/* Tracer and Tracee Shared Data */
+int shm_id;
+int *cptr, *pptr;
+
+float a = FPR_1;
+float b = FPR_2;
+float c = FPR_3;
+
+void gpr(void)
+{
+ unsigned long gpr_buf[18];
+ float fpr_buf[32];
+
+ cptr = (int *)shmat(shm_id, NULL, 0);
+
+ asm __volatile__(
+ ASM_LOAD_GPR_IMMED(gpr_1)
+ ASM_LOAD_FPR_SINGLE_PRECISION(flt_1)
+ :
+ : [gpr_1]"i"(GPR_1), [flt_1] "b" (&a)
+ : "memory", "r6", "r7", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15", "r16", "r17",
+ "r18", "r19", "r20", "r21", "r22", "r23", "r24",
+ "r25", "r26", "r27", "r28", "r29", "r30", "r31"
+ );
+
+ cptr[1] = 1;
+
+ while (!cptr[0])
+ asm volatile("" : : : "memory");
+
+ shmdt((void *)cptr);
+ store_gpr(gpr_buf);
+ store_fpr_single_precision(fpr_buf);
+
+ if (validate_gpr(gpr_buf, GPR_3))
+ exit(1);
+
+ if (validate_fpr_float(fpr_buf, c))
+ exit(1);
+
+ exit(0);
+}
+
+int trace_gpr(pid_t child)
+{
+ unsigned long gpr[18];
+ unsigned long fpr[32];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_gpr(child, gpr));
+ FAIL_IF(validate_gpr(gpr, GPR_1));
+ FAIL_IF(show_fpr(child, fpr));
+ FAIL_IF(validate_fpr(fpr, FPR_1_REP));
+ FAIL_IF(write_gpr(child, GPR_3));
+ FAIL_IF(write_fpr(child, FPR_3_REP));
+ FAIL_IF(stop_trace(child));
+
+ return TEST_PASS;
+}
+
+int ptrace_gpr(void)
+{
+ pid_t pid;
+ int ret, status;
+
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ return TEST_FAIL;
+ }
+ if (pid == 0)
+ gpr();
+
+ if (pid) {
+ pptr = (int *)shmat(shm_id, NULL, 0);
+ while (!pptr[1])
+ asm volatile("" : : : "memory");
+
+ ret = trace_gpr(pid);
+ if (ret) {
+ kill(pid, SIGTERM);
+ shmdt((void *)pptr);
+ shmctl(shm_id, IPC_RMID, NULL);
+ return TEST_FAIL;
+ }
+
+ pptr[0] = 1;
+ shmdt((void *)pptr);
+
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_gpr, "ptrace_gpr");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h
new file mode 100644
index 000000000..e30fef638
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#define GPR_1 1
+#define GPR_2 2
+#define GPR_3 3
+#define GPR_4 4
+
+#define FPR_1 0.001
+#define FPR_2 0.002
+#define FPR_3 0.003
+#define FPR_4 0.004
+
+#define FPR_1_REP 0x3f50624de0000000
+#define FPR_2_REP 0x3f60624de0000000
+#define FPR_3_REP 0x3f689374c0000000
+#define FPR_4_REP 0x3f70624de0000000
+
+/* Buffer must have 18 elements */
+int validate_gpr(unsigned long *gpr, unsigned long val)
+{
+ int i, found = 1;
+
+ for (i = 0; i < 18; i++) {
+ if (gpr[i] != val) {
+ printf("GPR[%d]: %lx Expected: %lx\n",
+ i+14, gpr[i], val);
+ found = 0;
+ }
+ }
+
+ if (!found)
+ return TEST_FAIL;
+ return TEST_PASS;
+}
+
+/* Buffer must have 32 elements */
+int validate_fpr(unsigned long *fpr, unsigned long val)
+{
+ int i, found = 1;
+
+ for (i = 0; i < 32; i++) {
+ if (fpr[i] != val) {
+ printf("FPR[%d]: %lx Expected: %lx\n", i, fpr[i], val);
+ found = 0;
+ }
+ }
+
+ if (!found)
+ return TEST_FAIL;
+ return TEST_PASS;
+}
+
+/* Buffer must have 32 elements */
+int validate_fpr_float(float *fpr, float val)
+{
+ int i, found = 1;
+
+ for (i = 0; i < 32; i++) {
+ if (fpr[i] != val) {
+ printf("FPR[%d]: %f Expected: %f\n", i, fpr[i], val);
+ found = 0;
+ }
+ }
+
+ if (!found)
+ return TEST_FAIL;
+ return TEST_PASS;
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
new file mode 100644
index 000000000..3066d310f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Ptrace test for hw breakpoints
+ *
+ * Based on tools/testing/selftests/breakpoints/breakpoint_test.c
+ *
+ * This test forks and the parent then traces the child doing various
+ * types of ptrace enabled breakpoints
+ *
+ * Copyright (C) 2018 Michael Neuling, IBM Corporation.
+ */
+
+#include <sys/ptrace.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/user.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "ptrace.h"
+
+/* Breakpoint access modes */
+enum {
+ BP_X = 1,
+ BP_RW = 2,
+ BP_W = 4,
+};
+
+static pid_t child_pid;
+static struct ppc_debug_info dbginfo;
+
+static void get_dbginfo(void)
+{
+ int ret;
+
+ ret = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
+ if (ret) {
+ perror("Can't get breakpoint info\n");
+ exit(-1);
+ }
+}
+
+static bool hwbreak_present(void)
+{
+ return (dbginfo.num_data_bps != 0);
+}
+
+static bool dawr_present(void)
+{
+ return !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_DAWR);
+}
+
+static void set_breakpoint_addr(void *addr)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, addr);
+ if (ret) {
+ perror("Can't set breakpoint addr\n");
+ exit(-1);
+ }
+}
+
+static int set_hwbreakpoint_addr(void *addr, int range)
+{
+ int ret;
+
+ struct ppc_hw_breakpoint info;
+
+ info.version = 1;
+ info.trigger_type = PPC_BREAKPOINT_TRIGGER_RW;
+ info.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+ if (range > 0)
+ info.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+ info.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+ info.addr = (__u64)addr;
+ info.addr2 = (__u64)addr + range;
+ info.condition_value = 0;
+
+ ret = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info);
+ if (ret < 0) {
+ perror("Can't set breakpoint\n");
+ exit(-1);
+ }
+ return ret;
+}
+
+static int del_hwbreakpoint_addr(int watchpoint_handle)
+{
+ int ret;
+
+ ret = ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, watchpoint_handle);
+ if (ret < 0) {
+ perror("Can't delete hw breakpoint\n");
+ exit(-1);
+ }
+ return ret;
+}
+
+#define DAWR_LENGTH_MAX 512
+
+/* Dummy variables to test read/write accesses */
+static unsigned long long
+ dummy_array[DAWR_LENGTH_MAX / sizeof(unsigned long long)]
+ __attribute__((aligned(512)));
+static unsigned long long *dummy_var = dummy_array;
+
+static void write_var(int len)
+{
+ long long *plval;
+ char *pcval;
+ short *psval;
+ int *pival;
+
+ switch (len) {
+ case 1:
+ pcval = (char *)dummy_var;
+ *pcval = 0xff;
+ break;
+ case 2:
+ psval = (short *)dummy_var;
+ *psval = 0xffff;
+ break;
+ case 4:
+ pival = (int *)dummy_var;
+ *pival = 0xffffffff;
+ break;
+ case 8:
+ plval = (long long *)dummy_var;
+ *plval = 0xffffffffffffffffLL;
+ break;
+ }
+}
+
+static void read_var(int len)
+{
+ char cval __attribute__((unused));
+ short sval __attribute__((unused));
+ int ival __attribute__((unused));
+ long long lval __attribute__((unused));
+
+ switch (len) {
+ case 1:
+ cval = *(char *)dummy_var;
+ break;
+ case 2:
+ sval = *(short *)dummy_var;
+ break;
+ case 4:
+ ival = *(int *)dummy_var;
+ break;
+ case 8:
+ lval = *(long long *)dummy_var;
+ break;
+ }
+}
+
+/*
+ * Do the r/w accesses to trigger the breakpoints. And run
+ * the usual traps.
+ */
+static void trigger_tests(void)
+{
+ int len, ret;
+
+ ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
+ if (ret) {
+ perror("Can't be traced?\n");
+ return;
+ }
+
+ /* Wake up father so that it sets up the first test */
+ kill(getpid(), SIGUSR1);
+
+ /* Test write watchpoints */
+ for (len = 1; len <= sizeof(long); len <<= 1)
+ write_var(len);
+
+ /* Test read/write watchpoints (on read accesses) */
+ for (len = 1; len <= sizeof(long); len <<= 1)
+ read_var(len);
+
+ /* Test when breakpoint is unset */
+
+ /* Test write watchpoints */
+ for (len = 1; len <= sizeof(long); len <<= 1)
+ write_var(len);
+
+ /* Test read/write watchpoints (on read accesses) */
+ for (len = 1; len <= sizeof(long); len <<= 1)
+ read_var(len);
+}
+
+static void check_success(const char *msg)
+{
+ const char *msg2;
+ int status;
+
+ /* Wait for the child to SIGTRAP */
+ wait(&status);
+
+ msg2 = "Failed";
+
+ if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
+ msg2 = "Child process hit the breakpoint";
+ }
+
+ printf("%s Result: [%s]\n", msg, msg2);
+}
+
+static void launch_watchpoints(char *buf, int mode, int len,
+ struct ppc_debug_info *dbginfo, bool dawr)
+{
+ const char *mode_str;
+ unsigned long data = (unsigned long)(dummy_var);
+ int wh, range;
+
+ data &= ~0x7UL;
+
+ if (mode == BP_W) {
+ data |= (1UL << 1);
+ mode_str = "write";
+ } else {
+ data |= (1UL << 0);
+ data |= (1UL << 1);
+ mode_str = "read";
+ }
+
+ /* Set DABR_TRANSLATION bit */
+ data |= (1UL << 2);
+
+ /* use PTRACE_SET_DEBUGREG breakpoints */
+ set_breakpoint_addr((void *)data);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
+ check_success(buf);
+ /* Unregister hw brkpoint */
+ set_breakpoint_addr(NULL);
+
+ data = (data & ~7); /* remove dabr control bits */
+
+ /* use PPC_PTRACE_SETHWDEBUG breakpoint */
+ if (!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
+ return; /* not supported */
+ wh = set_hwbreakpoint_addr((void *)data, 0);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
+ check_success(buf);
+ /* Unregister hw brkpoint */
+ del_hwbreakpoint_addr(wh);
+
+ /* try a wider range */
+ range = 8;
+ if (dawr)
+ range = 512 - ((int)data & (DAWR_LENGTH_MAX - 1));
+ wh = set_hwbreakpoint_addr((void *)data, range);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
+ check_success(buf);
+ /* Unregister hw brkpoint */
+ del_hwbreakpoint_addr(wh);
+}
+
+/* Set the breakpoints and check the child successfully trigger them */
+static int launch_tests(bool dawr)
+{
+ char buf[1024];
+ int len, i, status;
+
+ struct ppc_debug_info dbginfo;
+
+ i = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
+ if (i) {
+ perror("Can't set breakpoint info\n");
+ exit(-1);
+ }
+ if (!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
+ printf("WARNING: Kernel doesn't support PPC_PTRACE_SETHWDEBUG\n");
+
+ /* Write watchpoint */
+ for (len = 1; len <= sizeof(long); len <<= 1)
+ launch_watchpoints(buf, BP_W, len, &dbginfo, dawr);
+
+ /* Read-Write watchpoint */
+ for (len = 1; len <= sizeof(long); len <<= 1)
+ launch_watchpoints(buf, BP_RW, len, &dbginfo, dawr);
+
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+
+ /*
+ * Now we have unregistered the breakpoint, access by child
+ * should not cause SIGTRAP.
+ */
+
+ wait(&status);
+
+ if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
+ printf("FAIL: Child process hit the breakpoint, which is not expected\n");
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ return TEST_FAIL;
+ }
+
+ if (WIFEXITED(status))
+ printf("Child exited normally\n");
+
+ return TEST_PASS;
+}
+
+static int ptrace_hwbreak(void)
+{
+ pid_t pid;
+ int ret;
+ bool dawr;
+
+ pid = fork();
+ if (!pid) {
+ trigger_tests();
+ return 0;
+ }
+
+ wait(NULL);
+
+ child_pid = pid;
+
+ get_dbginfo();
+ SKIP_IF(!hwbreak_present());
+ dawr = dawr_present();
+
+ ret = launch_tests(dawr);
+
+ wait(NULL);
+
+ return ret;
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ return test_harness(ptrace_hwbreak, "ptrace-hwbreak");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
new file mode 100644
index 000000000..3694613f4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Ptrace test for Memory Protection Key registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ * Copyright (C) 2018 IBM Corporation.
+ */
+#include "ptrace.h"
+#include "child.h"
+
+#ifndef __NR_pkey_alloc
+#define __NR_pkey_alloc 384
+#endif
+
+#ifndef __NR_pkey_free
+#define __NR_pkey_free 385
+#endif
+
+#ifndef NT_PPC_PKEY
+#define NT_PPC_PKEY 0x110
+#endif
+
+#ifndef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE 0x4
+#endif
+
+#define AMR_BITS_PER_PKEY 2
+#define PKEY_REG_BITS (sizeof(u64) * 8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY))
+
+static const char user_read[] = "[User Read (Running)]";
+static const char user_write[] = "[User Write (Running)]";
+static const char ptrace_read_running[] = "[Ptrace Read (Running)]";
+static const char ptrace_write_running[] = "[Ptrace Write (Running)]";
+
+/* Information shared between the parent and the child. */
+struct shared_info {
+ struct child_sync child_sync;
+
+ /* AMR value the parent expects to read from the child. */
+ unsigned long amr1;
+
+ /* AMR value the parent is expected to write to the child. */
+ unsigned long amr2;
+
+ /* AMR value that ptrace should refuse to write to the child. */
+ unsigned long invalid_amr;
+
+ /* IAMR value the parent expects to read from the child. */
+ unsigned long expected_iamr;
+
+ /* UAMOR value the parent expects to read from the child. */
+ unsigned long expected_uamor;
+
+ /*
+ * IAMR and UAMOR values that ptrace should refuse to write to the child
+ * (even though they're valid ones) because userspace doesn't have
+ * access to those registers.
+ */
+ unsigned long invalid_iamr;
+ unsigned long invalid_uamor;
+};
+
+static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
+{
+ return syscall(__NR_pkey_alloc, flags, init_access_rights);
+}
+
+static int child(struct shared_info *info)
+{
+ unsigned long reg;
+ bool disable_execute = true;
+ int pkey1, pkey2, pkey3;
+ int ret;
+
+ /* Wait until parent fills out the initial register values. */
+ ret = wait_parent(&info->child_sync);
+ if (ret)
+ return ret;
+
+ /* Get some pkeys so that we can change their bits in the AMR. */
+ pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
+ if (pkey1 < 0) {
+ pkey1 = sys_pkey_alloc(0, 0);
+ CHILD_FAIL_IF(pkey1 < 0, &info->child_sync);
+
+ disable_execute = false;
+ }
+
+ pkey2 = sys_pkey_alloc(0, 0);
+ CHILD_FAIL_IF(pkey2 < 0, &info->child_sync);
+
+ pkey3 = sys_pkey_alloc(0, 0);
+ CHILD_FAIL_IF(pkey3 < 0, &info->child_sync);
+
+ info->amr1 |= 3ul << pkeyshift(pkey1);
+ info->amr2 |= 3ul << pkeyshift(pkey2);
+ /*
+ * invalid amr value where we try to force write
+ * things which are deined by a uamor setting.
+ */
+ info->invalid_amr = info->amr2 | (~0x0UL & ~info->expected_uamor);
+
+ /*
+ * if PKEY_DISABLE_EXECUTE succeeded we should update the expected_iamr
+ */
+ if (disable_execute)
+ info->expected_iamr |= 1ul << pkeyshift(pkey1);
+ else
+ info->expected_iamr &= ~(1ul << pkeyshift(pkey1));
+
+ /*
+ * We allocated pkey2 and pkey 3 above. Clear the IAMR bits.
+ */
+ info->expected_iamr &= ~(1ul << pkeyshift(pkey2));
+ info->expected_iamr &= ~(1ul << pkeyshift(pkey3));
+
+ /*
+ * Create an IAMR value different from expected value.
+ * Kernel will reject an IAMR and UAMOR change.
+ */
+ info->invalid_iamr = info->expected_iamr | (1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2));
+ info->invalid_uamor = info->expected_uamor & ~(0x3ul << pkeyshift(pkey1));
+
+ printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
+ user_write, info->amr1, pkey1, pkey2, pkey3);
+
+ mtspr(SPRN_AMR, info->amr1);
+
+ /* Wait for parent to read our AMR value and write a new one. */
+ ret = prod_parent(&info->child_sync);
+ CHILD_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_parent(&info->child_sync);
+ if (ret)
+ return ret;
+
+ reg = mfspr(SPRN_AMR);
+
+ printf("%-30s AMR: %016lx\n", user_read, reg);
+
+ CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+ /*
+ * Wait for parent to try to write an invalid AMR value.
+ */
+ ret = prod_parent(&info->child_sync);
+ CHILD_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_parent(&info->child_sync);
+ if (ret)
+ return ret;
+
+ reg = mfspr(SPRN_AMR);
+
+ printf("%-30s AMR: %016lx\n", user_read, reg);
+
+ CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+ /*
+ * Wait for parent to try to write an IAMR and a UAMOR value. We can't
+ * verify them, but we can verify that the AMR didn't change.
+ */
+ ret = prod_parent(&info->child_sync);
+ CHILD_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_parent(&info->child_sync);
+ if (ret)
+ return ret;
+
+ reg = mfspr(SPRN_AMR);
+
+ printf("%-30s AMR: %016lx\n", user_read, reg);
+
+ CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+ /* Now let parent now that we are finished. */
+
+ ret = prod_parent(&info->child_sync);
+ CHILD_FAIL_IF(ret, &info->child_sync);
+
+ return TEST_PASS;
+}
+
+static int parent(struct shared_info *info, pid_t pid)
+{
+ unsigned long regs[3];
+ int ret, status;
+
+ /*
+ * Get the initial values for AMR, IAMR and UAMOR and communicate them
+ * to the child.
+ */
+ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+ PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ info->amr1 = info->amr2 = regs[0];
+ info->expected_iamr = regs[1];
+ info->expected_uamor = regs[2];
+
+ /* Wake up child so that it can set itself up. */
+ ret = prod_child(&info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_child(&info->child_sync);
+ if (ret)
+ return ret;
+
+ /* Verify that we can read the pkey registers from the child. */
+ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+ ptrace_read_running, regs[0], regs[1], regs[2]);
+
+ PARENT_FAIL_IF(regs[0] != info->amr1, &info->child_sync);
+ PARENT_FAIL_IF(regs[1] != info->expected_iamr, &info->child_sync);
+ PARENT_FAIL_IF(regs[2] != info->expected_uamor, &info->child_sync);
+
+ /* Write valid AMR value in child. */
+ ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr2, 1);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr2);
+
+ /* Wake up child so that it can verify it changed. */
+ ret = prod_child(&info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_child(&info->child_sync);
+ if (ret)
+ return ret;
+
+ /* Write invalid AMR value in child. */
+ ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->invalid_amr, 1);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx\n", ptrace_write_running, info->invalid_amr);
+
+ /* Wake up child so that it can verify it didn't change. */
+ ret = prod_child(&info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_child(&info->child_sync);
+ if (ret)
+ return ret;
+
+ /* Try to write to IAMR. */
+ regs[0] = info->amr1;
+ regs[1] = info->invalid_iamr;
+ ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2);
+ PARENT_FAIL_IF(!ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx IAMR: %016lx\n",
+ ptrace_write_running, regs[0], regs[1]);
+
+ /* Try to write to IAMR and UAMOR. */
+ regs[2] = info->invalid_uamor;
+ ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3);
+ PARENT_FAIL_IF(!ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+ ptrace_write_running, regs[0], regs[1], regs[2]);
+
+ /* Verify that all registers still have their expected values. */
+ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+ ptrace_read_running, regs[0], regs[1], regs[2]);
+
+ PARENT_FAIL_IF(regs[0] != info->amr2, &info->child_sync);
+ PARENT_FAIL_IF(regs[1] != info->expected_iamr, &info->child_sync);
+ PARENT_FAIL_IF(regs[2] != info->expected_uamor, &info->child_sync);
+
+ /* Wake up child so that it can verify AMR didn't change and wrap up. */
+ ret = prod_child(&info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait(&status);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ ret = TEST_PASS;
+ } else if (!WIFEXITED(status)) {
+ printf("Child exited abnormally\n");
+ ret = TEST_FAIL;
+ } else
+ ret = WEXITSTATUS(status) ? TEST_FAIL : TEST_PASS;
+
+ return ret;
+}
+
+static int ptrace_pkey(void)
+{
+ struct shared_info *info;
+ int shm_id;
+ int ret;
+ pid_t pid;
+
+ shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT);
+ info = shmat(shm_id, NULL, 0);
+
+ ret = init_child_sync(&info->child_sync);
+ if (ret)
+ return ret;
+
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ ret = TEST_FAIL;
+ } else if (pid == 0)
+ ret = child(info);
+ else
+ ret = parent(info, pid);
+
+ shmdt(info);
+
+ if (pid) {
+ destroy_child_sync(&info->child_sync);
+ shmctl(shm_id, IPC_RMID, NULL);
+ }
+
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_pkey, "ptrace_pkey");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
new file mode 100644
index 000000000..f9b5069db
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
@@ -0,0 +1,135 @@
+/*
+ * Ptrace test for TAR, PPR, DSCR registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "ptrace-tar.h"
+
+/* Tracer and Tracee Shared Data */
+int shm_id;
+int *cptr;
+int *pptr;
+
+void tar(void)
+{
+ unsigned long reg[3];
+ int ret;
+
+ cptr = (int *)shmat(shm_id, NULL, 0);
+ printf("%-30s TAR: %u PPR: %lx DSCR: %u\n",
+ user_write, TAR_1, PPR_1, DSCR_1);
+
+ mtspr(SPRN_TAR, TAR_1);
+ mtspr(SPRN_PPR, PPR_1);
+ mtspr(SPRN_DSCR, DSCR_1);
+
+ cptr[2] = 1;
+
+ /* Wait on parent */
+ while (!cptr[0])
+ asm volatile("" : : : "memory");
+
+ reg[0] = mfspr(SPRN_TAR);
+ reg[1] = mfspr(SPRN_PPR);
+ reg[2] = mfspr(SPRN_DSCR);
+
+ printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+ user_read, reg[0], reg[1], reg[2]);
+
+ /* Unblock the parent now */
+ cptr[1] = 1;
+ shmdt((int *)cptr);
+
+ ret = validate_tar_registers(reg, TAR_2, PPR_2, DSCR_2);
+ if (ret)
+ exit(1);
+ exit(0);
+}
+
+int trace_tar(pid_t child)
+{
+ unsigned long reg[3];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_tar_registers(child, reg));
+ printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+ ptrace_read_running, reg[0], reg[1], reg[2]);
+
+ FAIL_IF(validate_tar_registers(reg, TAR_1, PPR_1, DSCR_1));
+ FAIL_IF(stop_trace(child));
+ return TEST_PASS;
+}
+
+int trace_tar_write(pid_t child)
+{
+ FAIL_IF(start_trace(child));
+ FAIL_IF(write_tar_registers(child, TAR_2, PPR_2, DSCR_2));
+ printf("%-30s TAR: %u PPR: %lx DSCR: %u\n",
+ ptrace_write_running, TAR_2, PPR_2, DSCR_2);
+
+ FAIL_IF(stop_trace(child));
+ return TEST_PASS;
+}
+
+int ptrace_tar(void)
+{
+ pid_t pid;
+ int ret, status;
+
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ return TEST_FAIL;
+ }
+
+ if (pid == 0)
+ tar();
+
+ if (pid) {
+ pptr = (int *)shmat(shm_id, NULL, 0);
+ pptr[0] = 0;
+ pptr[1] = 0;
+
+ while (!pptr[2])
+ asm volatile("" : : : "memory");
+ ret = trace_tar(pid);
+ if (ret)
+ return ret;
+
+ ret = trace_tar_write(pid);
+ if (ret)
+ return ret;
+
+ /* Unblock the child now */
+ pptr[0] = 1;
+
+ /* Wait on child */
+ while (!pptr[1])
+ asm volatile("" : : : "memory");
+
+ shmdt((int *)pptr);
+
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_PASS;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_tar, "ptrace_tar");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.h b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.h
new file mode 100644
index 000000000..aed0aac71
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#define TAR_1 10
+#define TAR_2 20
+#define TAR_3 30
+#define TAR_4 40
+#define TAR_5 50
+
+#define DSCR_1 100
+#define DSCR_2 200
+#define DSCR_3 300
+#define DSCR_4 400
+#define DSCR_5 500
+
+#define PPR_1 0x4000000000000 /* or 31,31,31*/
+#define PPR_2 0x8000000000000 /* or 1,1,1 */
+#define PPR_3 0xc000000000000 /* or 6,6,6 */
+#define PPR_4 0x10000000000000 /* or 2,2,2 */
+
+char *user_read = "[User Read (Running)]";
+char *user_write = "[User Write (Running)]";
+char *ptrace_read_running = "[Ptrace Read (Running)]";
+char *ptrace_write_running = "[Ptrace Write (Running)]";
+char *ptrace_read_ckpt = "[Ptrace Read (Checkpointed)]";
+char *ptrace_write_ckpt = "[Ptrace Write (Checkpointed)]";
+
+int validate_tar_registers(unsigned long *reg, unsigned long tar,
+ unsigned long ppr, unsigned long dscr)
+{
+ int match = 1;
+
+ if (reg[0] != tar)
+ match = 0;
+
+ if (reg[1] != ppr)
+ match = 0;
+
+ if (reg[2] != dscr)
+ match = 0;
+
+ if (!match)
+ return TEST_FAIL;
+ return TEST_PASS;
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
new file mode 100644
index 000000000..a08a91594
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
@@ -0,0 +1,158 @@
+/*
+ * Ptrace test for GPR/FPR registers in TM context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "ptrace-gpr.h"
+#include "tm.h"
+
+/* Tracer and Tracee Shared Data */
+int shm_id;
+unsigned long *cptr, *pptr;
+
+float a = FPR_1;
+float b = FPR_2;
+float c = FPR_3;
+
+void tm_gpr(void)
+{
+ unsigned long gpr_buf[18];
+ unsigned long result, texasr;
+ float fpr_buf[32];
+
+ printf("Starting the child\n");
+ cptr = (unsigned long *)shmat(shm_id, NULL, 0);
+
+trans:
+ cptr[1] = 0;
+ asm __volatile__(
+ ASM_LOAD_GPR_IMMED(gpr_1)
+ ASM_LOAD_FPR_SINGLE_PRECISION(flt_1)
+ "1: ;"
+ "tbegin.;"
+ "beq 2f;"
+ ASM_LOAD_GPR_IMMED(gpr_2)
+ ASM_LOAD_FPR_SINGLE_PRECISION(flt_2)
+ "tsuspend.;"
+ "li 7, 1;"
+ "stw 7, 0(%[cptr1]);"
+ "tresume.;"
+ "b .;"
+
+ "tend.;"
+ "li 0, 0;"
+ "ori %[res], 0, 0;"
+ "b 3f;"
+
+ /* Transaction abort handler */
+ "2: ;"
+ "li 0, 1;"
+ "ori %[res], 0, 0;"
+ "mfspr %[texasr], %[sprn_texasr];"
+
+ "3: ;"
+ : [res] "=r" (result), [texasr] "=r" (texasr)
+ : [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2),
+ [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
+ [flt_2] "b" (&b), [cptr1] "b" (&cptr[1])
+ : "memory", "r7", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15", "r16",
+ "r17", "r18", "r19", "r20", "r21", "r22",
+ "r23", "r24", "r25", "r26", "r27", "r28",
+ "r29", "r30", "r31"
+ );
+
+ if (result) {
+ if (!cptr[0])
+ goto trans;
+
+ shmdt((void *)cptr);
+ store_gpr(gpr_buf);
+ store_fpr_single_precision(fpr_buf);
+
+ if (validate_gpr(gpr_buf, GPR_3))
+ exit(1);
+
+ if (validate_fpr_float(fpr_buf, c))
+ exit(1);
+
+ exit(0);
+ }
+ shmdt((void *)cptr);
+ exit(1);
+}
+
+int trace_tm_gpr(pid_t child)
+{
+ unsigned long gpr[18];
+ unsigned long fpr[32];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_gpr(child, gpr));
+ FAIL_IF(validate_gpr(gpr, GPR_2));
+ FAIL_IF(show_fpr(child, fpr));
+ FAIL_IF(validate_fpr(fpr, FPR_2_REP));
+ FAIL_IF(show_ckpt_fpr(child, fpr));
+ FAIL_IF(validate_fpr(fpr, FPR_1_REP));
+ FAIL_IF(show_ckpt_gpr(child, gpr));
+ FAIL_IF(validate_gpr(gpr, GPR_1));
+ FAIL_IF(write_ckpt_gpr(child, GPR_3));
+ FAIL_IF(write_ckpt_fpr(child, FPR_3_REP));
+
+ pptr[0] = 1;
+ FAIL_IF(stop_trace(child));
+
+ return TEST_PASS;
+}
+
+int ptrace_tm_gpr(void)
+{
+ pid_t pid;
+ int ret, status;
+
+ SKIP_IF(!have_htm());
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ return TEST_FAIL;
+ }
+ if (pid == 0)
+ tm_gpr();
+
+ if (pid) {
+ pptr = (unsigned long *)shmat(shm_id, NULL, 0);
+
+ while (!pptr[1])
+ asm volatile("" : : : "memory");
+ ret = trace_tm_gpr(pid);
+ if (ret) {
+ kill(pid, SIGTERM);
+ return TEST_FAIL;
+ }
+
+ shmdt((void *)pptr);
+
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_tm_gpr, "ptrace_tm_gpr");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
new file mode 100644
index 000000000..dbdffa2e2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
@@ -0,0 +1,169 @@
+/*
+ * Ptrace test for GPR/FPR registers in TM Suspend context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "ptrace-gpr.h"
+#include "tm.h"
+
+/* Tracer and Tracee Shared Data */
+int shm_id;
+int *cptr, *pptr;
+
+float a = FPR_1;
+float b = FPR_2;
+float c = FPR_3;
+float d = FPR_4;
+
+__attribute__((used)) void wait_parent(void)
+{
+ cptr[2] = 1;
+ while (!cptr[1])
+ asm volatile("" : : : "memory");
+}
+
+void tm_spd_gpr(void)
+{
+ unsigned long gpr_buf[18];
+ unsigned long result, texasr;
+ float fpr_buf[32];
+
+ cptr = (int *)shmat(shm_id, NULL, 0);
+
+trans:
+ cptr[2] = 0;
+ asm __volatile__(
+ ASM_LOAD_GPR_IMMED(gpr_1)
+ ASM_LOAD_FPR_SINGLE_PRECISION(flt_1)
+
+ "1: ;"
+ "tbegin.;"
+ "beq 2f;"
+
+ ASM_LOAD_GPR_IMMED(gpr_2)
+ "tsuspend.;"
+ ASM_LOAD_GPR_IMMED(gpr_4)
+ ASM_LOAD_FPR_SINGLE_PRECISION(flt_4)
+
+ "bl wait_parent;"
+ "tresume.;"
+ "tend.;"
+ "li 0, 0;"
+ "ori %[res], 0, 0;"
+ "b 3f;"
+
+ /* Transaction abort handler */
+ "2: ;"
+ "li 0, 1;"
+ "ori %[res], 0, 0;"
+ "mfspr %[texasr], %[sprn_texasr];"
+
+ "3: ;"
+ : [res] "=r" (result), [texasr] "=r" (texasr)
+ : [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2), [gpr_4]"i"(GPR_4),
+ [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
+ [flt_4] "b" (&d)
+ : "memory", "r5", "r6", "r7",
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+ "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
+ );
+
+ if (result) {
+ if (!cptr[0])
+ goto trans;
+
+ shmdt((void *)cptr);
+ store_gpr(gpr_buf);
+ store_fpr_single_precision(fpr_buf);
+
+ if (validate_gpr(gpr_buf, GPR_3))
+ exit(1);
+
+ if (validate_fpr_float(fpr_buf, c))
+ exit(1);
+ exit(0);
+ }
+ shmdt((void *)cptr);
+ exit(1);
+}
+
+int trace_tm_spd_gpr(pid_t child)
+{
+ unsigned long gpr[18];
+ unsigned long fpr[32];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_gpr(child, gpr));
+ FAIL_IF(validate_gpr(gpr, GPR_4));
+ FAIL_IF(show_fpr(child, fpr));
+ FAIL_IF(validate_fpr(fpr, FPR_4_REP));
+ FAIL_IF(show_ckpt_fpr(child, fpr));
+ FAIL_IF(validate_fpr(fpr, FPR_1_REP));
+ FAIL_IF(show_ckpt_gpr(child, gpr));
+ FAIL_IF(validate_gpr(gpr, GPR_1));
+ FAIL_IF(write_ckpt_gpr(child, GPR_3));
+ FAIL_IF(write_ckpt_fpr(child, FPR_3_REP));
+
+ pptr[0] = 1;
+ pptr[1] = 1;
+ FAIL_IF(stop_trace(child));
+ return TEST_PASS;
+}
+
+int ptrace_tm_spd_gpr(void)
+{
+ pid_t pid;
+ int ret, status;
+
+ SKIP_IF(!have_htm());
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ return TEST_FAIL;
+ }
+
+ if (pid == 0)
+ tm_spd_gpr();
+
+ if (pid) {
+ pptr = (int *)shmat(shm_id, NULL, 0);
+ pptr[0] = 0;
+ pptr[1] = 0;
+
+ while (!pptr[2])
+ asm volatile("" : : : "memory");
+ ret = trace_tm_spd_gpr(pid);
+ if (ret) {
+ kill(pid, SIGTERM);
+ shmdt((void *)pptr);
+ shmctl(shm_id, IPC_RMID, NULL);
+ return TEST_FAIL;
+ }
+
+ shmdt((void *)pptr);
+
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_tm_spd_gpr, "ptrace_tm_spd_gpr");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
new file mode 100644
index 000000000..f47174746
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
@@ -0,0 +1,174 @@
+/*
+ * Ptrace test for TAR, PPR, DSCR registers in the TM Suspend context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "tm.h"
+#include "ptrace-tar.h"
+
+int shm_id;
+int *cptr, *pptr;
+
+__attribute__((used)) void wait_parent(void)
+{
+ cptr[2] = 1;
+ while (!cptr[1])
+ asm volatile("" : : : "memory");
+}
+
+void tm_spd_tar(void)
+{
+ unsigned long result, texasr;
+ unsigned long regs[3];
+ int ret;
+
+ cptr = (int *)shmat(shm_id, NULL, 0);
+
+trans:
+ cptr[2] = 0;
+ asm __volatile__(
+ "li 4, %[tar_1];"
+ "mtspr %[sprn_tar], 4;" /* TAR_1 */
+ "li 4, %[dscr_1];"
+ "mtspr %[sprn_dscr], 4;" /* DSCR_1 */
+ "or 31,31,31;" /* PPR_1*/
+
+ "1: ;"
+ "tbegin.;"
+ "beq 2f;"
+
+ "li 4, %[tar_2];"
+ "mtspr %[sprn_tar], 4;" /* TAR_2 */
+ "li 4, %[dscr_2];"
+ "mtspr %[sprn_dscr], 4;" /* DSCR_2 */
+ "or 1,1,1;" /* PPR_2 */
+
+ "tsuspend.;"
+ "li 4, %[tar_3];"
+ "mtspr %[sprn_tar], 4;" /* TAR_3 */
+ "li 4, %[dscr_3];"
+ "mtspr %[sprn_dscr], 4;" /* DSCR_3 */
+ "or 6,6,6;" /* PPR_3 */
+ "bl wait_parent;"
+ "tresume.;"
+
+ "tend.;"
+ "li 0, 0;"
+ "ori %[res], 0, 0;"
+ "b 3f;"
+
+ /* Transaction abort handler */
+ "2: ;"
+ "li 0, 1;"
+ "ori %[res], 0, 0;"
+ "mfspr %[texasr], %[sprn_texasr];"
+
+ "3: ;"
+
+ : [res] "=r" (result), [texasr] "=r" (texasr)
+ : [sprn_dscr]"i"(SPRN_DSCR),
+ [sprn_tar]"i"(SPRN_TAR), [sprn_ppr]"i"(SPRN_PPR),
+ [sprn_texasr]"i"(SPRN_TEXASR), [tar_1]"i"(TAR_1),
+ [dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2), [dscr_2]"i"(DSCR_2),
+ [tar_3]"i"(TAR_3), [dscr_3]"i"(DSCR_3)
+ : "memory", "r0", "r1", "r3", "r4", "r5", "r6"
+ );
+
+ /* TM failed, analyse */
+ if (result) {
+ if (!cptr[0])
+ goto trans;
+
+ regs[0] = mfspr(SPRN_TAR);
+ regs[1] = mfspr(SPRN_PPR);
+ regs[2] = mfspr(SPRN_DSCR);
+
+ shmdt(&cptr);
+ printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+ user_read, regs[0], regs[1], regs[2]);
+
+ ret = validate_tar_registers(regs, TAR_4, PPR_4, DSCR_4);
+ if (ret)
+ exit(1);
+ exit(0);
+ }
+ shmdt(&cptr);
+ exit(1);
+}
+
+int trace_tm_spd_tar(pid_t child)
+{
+ unsigned long regs[3];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_tar_registers(child, regs));
+ printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+ ptrace_read_running, regs[0], regs[1], regs[2]);
+
+ FAIL_IF(validate_tar_registers(regs, TAR_3, PPR_3, DSCR_3));
+ FAIL_IF(show_tm_checkpointed_state(child, regs));
+ printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+ ptrace_read_ckpt, regs[0], regs[1], regs[2]);
+
+ FAIL_IF(validate_tar_registers(regs, TAR_1, PPR_1, DSCR_1));
+ FAIL_IF(write_ckpt_tar_registers(child, TAR_4, PPR_4, DSCR_4));
+ printf("%-30s TAR: %u PPR: %lx DSCR: %u\n",
+ ptrace_write_ckpt, TAR_4, PPR_4, DSCR_4);
+
+ pptr[0] = 1;
+ pptr[1] = 1;
+ FAIL_IF(stop_trace(child));
+ return TEST_PASS;
+}
+
+int ptrace_tm_spd_tar(void)
+{
+ pid_t pid;
+ int ret, status;
+
+ SKIP_IF(!have_htm());
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
+ pid = fork();
+ if (pid == 0)
+ tm_spd_tar();
+
+ pptr = (int *)shmat(shm_id, NULL, 0);
+ pptr[0] = 0;
+ pptr[1] = 0;
+
+ if (pid) {
+ while (!pptr[2])
+ asm volatile("" : : : "memory");
+ ret = trace_tm_spd_tar(pid);
+ if (ret) {
+ kill(pid, SIGTERM);
+ shmdt(&pptr);
+ shmctl(shm_id, IPC_RMID, NULL);
+ return TEST_FAIL;
+ }
+
+ shmdt(&pptr);
+
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_tm_spd_tar, "ptrace_tm_spd_tar");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
new file mode 100644
index 000000000..18a685bf6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
@@ -0,0 +1,184 @@
+/*
+ * Ptrace test for VMX/VSX registers in the TM Suspend context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "tm.h"
+#include "ptrace-vsx.h"
+
+int shm_id;
+int *cptr, *pptr;
+
+unsigned long fp_load[VEC_MAX];
+unsigned long fp_load_new[VEC_MAX];
+unsigned long fp_store[VEC_MAX];
+unsigned long fp_load_ckpt[VEC_MAX];
+unsigned long fp_load_ckpt_new[VEC_MAX];
+
+__attribute__((used)) void load_vsx(void)
+{
+ loadvsx(fp_load, 0);
+}
+
+__attribute__((used)) void load_vsx_new(void)
+{
+ loadvsx(fp_load_new, 0);
+}
+
+__attribute__((used)) void load_vsx_ckpt(void)
+{
+ loadvsx(fp_load_ckpt, 0);
+}
+
+__attribute__((used)) void wait_parent(void)
+{
+ cptr[2] = 1;
+ while (!cptr[1])
+ asm volatile("" : : : "memory");
+}
+
+void tm_spd_vsx(void)
+{
+ unsigned long result, texasr;
+ int ret;
+
+ cptr = (int *)shmat(shm_id, NULL, 0);
+
+trans:
+ cptr[2] = 0;
+ asm __volatile__(
+ "bl load_vsx_ckpt;"
+
+ "1: ;"
+ "tbegin.;"
+ "beq 2f;"
+
+ "bl load_vsx_new;"
+ "tsuspend.;"
+ "bl load_vsx;"
+ "bl wait_parent;"
+ "tresume.;"
+
+ "tend.;"
+ "li 0, 0;"
+ "ori %[res], 0, 0;"
+ "b 3f;"
+
+ "2: ;"
+ "li 0, 1;"
+ "ori %[res], 0, 0;"
+ "mfspr %[texasr], %[sprn_texasr];"
+
+ "3: ;"
+ : [res] "=r" (result), [texasr] "=r" (texasr)
+ : [sprn_texasr] "i" (SPRN_TEXASR)
+ : "memory", "r0", "r1", "r3", "r4",
+ "r7", "r8", "r9", "r10", "r11"
+ );
+
+ if (result) {
+ if (!cptr[0])
+ goto trans;
+ shmdt((void *)cptr);
+
+ storevsx(fp_store, 0);
+ ret = compare_vsx_vmx(fp_store, fp_load_ckpt_new);
+ if (ret)
+ exit(1);
+ exit(0);
+ }
+ shmdt((void *)cptr);
+ exit(1);
+}
+
+int trace_tm_spd_vsx(pid_t child)
+{
+ unsigned long vsx[VSX_MAX];
+ unsigned long vmx[VMX_MAX + 2][2];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_vsx(child, vsx));
+ FAIL_IF(validate_vsx(vsx, fp_load));
+ FAIL_IF(show_vmx(child, vmx));
+ FAIL_IF(validate_vmx(vmx, fp_load));
+ FAIL_IF(show_vsx_ckpt(child, vsx));
+ FAIL_IF(validate_vsx(vsx, fp_load_ckpt));
+ FAIL_IF(show_vmx_ckpt(child, vmx));
+ FAIL_IF(validate_vmx(vmx, fp_load_ckpt));
+
+ memset(vsx, 0, sizeof(vsx));
+ memset(vmx, 0, sizeof(vmx));
+
+ load_vsx_vmx(fp_load_ckpt_new, vsx, vmx);
+
+ FAIL_IF(write_vsx_ckpt(child, vsx));
+ FAIL_IF(write_vmx_ckpt(child, vmx));
+
+ pptr[0] = 1;
+ pptr[1] = 1;
+ FAIL_IF(stop_trace(child));
+
+ return TEST_PASS;
+}
+
+int ptrace_tm_spd_vsx(void)
+{
+ pid_t pid;
+ int ret, status, i;
+
+ SKIP_IF(!have_htm());
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
+
+ for (i = 0; i < 128; i++) {
+ fp_load[i] = 1 + rand();
+ fp_load_new[i] = 1 + 2 * rand();
+ fp_load_ckpt[i] = 1 + 3 * rand();
+ fp_load_ckpt_new[i] = 1 + 4 * rand();
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ return TEST_FAIL;
+ }
+
+ if (pid == 0)
+ tm_spd_vsx();
+
+ if (pid) {
+ pptr = (int *)shmat(shm_id, NULL, 0);
+ while (!pptr[2])
+ asm volatile("" : : : "memory");
+
+ ret = trace_tm_spd_vsx(pid);
+ if (ret) {
+ kill(pid, SIGKILL);
+ shmdt((void *)pptr);
+ shmctl(shm_id, IPC_RMID, NULL);
+ return TEST_FAIL;
+ }
+
+ shmdt((void *)pptr);
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_tm_spd_vsx, "ptrace_tm_spd_vsx");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
new file mode 100644
index 000000000..ba0499925
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
@@ -0,0 +1,167 @@
+/*
+ * Ptrace test TM SPR registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "tm.h"
+
+/* Tracee and tracer shared data */
+struct shared {
+ int flag;
+ struct tm_spr_regs regs;
+};
+unsigned long tfhar;
+
+int shm_id;
+struct shared *cptr, *pptr;
+
+int shm_id1;
+int *cptr1, *pptr1;
+
+#define TM_KVM_SCHED 0xe0000001ac000001
+int validate_tm_spr(struct tm_spr_regs *regs)
+{
+ FAIL_IF(regs->tm_tfhar != tfhar);
+ FAIL_IF((regs->tm_texasr == TM_KVM_SCHED) && (regs->tm_tfiar != 0));
+
+ return TEST_PASS;
+}
+
+void tm_spr(void)
+{
+ unsigned long result, texasr;
+ int ret;
+
+ cptr = (struct shared *)shmat(shm_id, NULL, 0);
+ cptr1 = (int *)shmat(shm_id1, NULL, 0);
+
+trans:
+ cptr1[0] = 0;
+ asm __volatile__(
+ "1: ;"
+ /* TM failover handler should follow "tbegin.;" */
+ "mflr 31;"
+ "bl 4f;" /* $ = TFHAR - 12 */
+ "4: ;"
+ "mflr %[tfhar];"
+ "mtlr 31;"
+
+ "tbegin.;"
+ "beq 2f;"
+
+ "tsuspend.;"
+ "li 8, 1;"
+ "sth 8, 0(%[cptr1]);"
+ "tresume.;"
+ "b .;"
+
+ "tend.;"
+ "li 0, 0;"
+ "ori %[res], 0, 0;"
+ "b 3f;"
+
+ "2: ;"
+
+ "li 0, 1;"
+ "ori %[res], 0, 0;"
+ "mfspr %[texasr], %[sprn_texasr];"
+
+ "3: ;"
+ : [tfhar] "=r" (tfhar), [res] "=r" (result),
+ [texasr] "=r" (texasr), [cptr1] "=b" (cptr1)
+ : [sprn_texasr] "i" (SPRN_TEXASR)
+ : "memory", "r0", "r8", "r31"
+ );
+
+ /* There are 2 32bit instructions before tbegin. */
+ tfhar += 12;
+
+ if (result) {
+ if (!cptr->flag)
+ goto trans;
+
+ ret = validate_tm_spr((struct tm_spr_regs *)&cptr->regs);
+ shmdt((void *)cptr);
+ shmdt((void *)cptr1);
+ if (ret)
+ exit(1);
+ exit(0);
+ }
+ shmdt((void *)cptr);
+ shmdt((void *)cptr1);
+ exit(1);
+}
+
+int trace_tm_spr(pid_t child)
+{
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_tm_spr(child, (struct tm_spr_regs *)&pptr->regs));
+
+ printf("TFHAR: %lx TEXASR: %lx TFIAR: %lx\n", pptr->regs.tm_tfhar,
+ pptr->regs.tm_texasr, pptr->regs.tm_tfiar);
+
+ pptr->flag = 1;
+ FAIL_IF(stop_trace(child));
+
+ return TEST_PASS;
+}
+
+int ptrace_tm_spr(void)
+{
+ pid_t pid;
+ int ret, status;
+
+ SKIP_IF(!have_htm());
+ shm_id = shmget(IPC_PRIVATE, sizeof(struct shared), 0777|IPC_CREAT);
+ shm_id1 = shmget(IPC_PRIVATE, sizeof(int), 0777|IPC_CREAT);
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ return TEST_FAIL;
+ }
+
+ if (pid == 0)
+ tm_spr();
+
+ if (pid) {
+ pptr = (struct shared *)shmat(shm_id, NULL, 0);
+ pptr1 = (int *)shmat(shm_id1, NULL, 0);
+
+ while (!pptr1[0])
+ asm volatile("" : : : "memory");
+ ret = trace_tm_spr(pid);
+ if (ret) {
+ kill(pid, SIGKILL);
+ shmdt((void *)pptr);
+ shmdt((void *)pptr1);
+ shmctl(shm_id, IPC_RMID, NULL);
+ shmctl(shm_id1, IPC_RMID, NULL);
+ return TEST_FAIL;
+ }
+
+ shmdt((void *)pptr);
+ shmdt((void *)pptr1);
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ shmctl(shm_id1, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_tm_spr, "ptrace_tm_spr");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
new file mode 100644
index 000000000..f70023b25
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
@@ -0,0 +1,160 @@
+/*
+ * Ptrace test for TAR, PPR, DSCR registers in the TM context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "tm.h"
+#include "ptrace-tar.h"
+
+int shm_id;
+unsigned long *cptr, *pptr;
+
+
+void tm_tar(void)
+{
+ unsigned long result, texasr;
+ unsigned long regs[3];
+ int ret;
+
+ cptr = (unsigned long *)shmat(shm_id, NULL, 0);
+
+trans:
+ cptr[1] = 0;
+ asm __volatile__(
+ "li 4, %[tar_1];"
+ "mtspr %[sprn_tar], 4;" /* TAR_1 */
+ "li 4, %[dscr_1];"
+ "mtspr %[sprn_dscr], 4;" /* DSCR_1 */
+ "or 31,31,31;" /* PPR_1*/
+
+ "1: ;"
+ "tbegin.;"
+ "beq 2f;"
+
+ "li 4, %[tar_2];"
+ "mtspr %[sprn_tar], 4;" /* TAR_2 */
+ "li 4, %[dscr_2];"
+ "mtspr %[sprn_dscr], 4;" /* DSCR_2 */
+ "or 1,1,1;" /* PPR_2 */
+ "tsuspend.;"
+ "li 0, 1;"
+ "stw 0, 0(%[cptr1]);"
+ "tresume.;"
+ "b .;"
+
+ "tend.;"
+ "li 0, 0;"
+ "ori %[res], 0, 0;"
+ "b 3f;"
+
+ /* Transaction abort handler */
+ "2: ;"
+ "li 0, 1;"
+ "ori %[res], 0, 0;"
+ "mfspr %[texasr], %[sprn_texasr];"
+
+ "3: ;"
+
+ : [res] "=r" (result), [texasr] "=r" (texasr)
+ : [sprn_dscr]"i"(SPRN_DSCR), [sprn_tar]"i"(SPRN_TAR),
+ [sprn_ppr]"i"(SPRN_PPR), [sprn_texasr]"i"(SPRN_TEXASR),
+ [tar_1]"i"(TAR_1), [dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2),
+ [dscr_2]"i"(DSCR_2), [cptr1] "b" (&cptr[1])
+ : "memory", "r0", "r1", "r3", "r4", "r5", "r6"
+ );
+
+ /* TM failed, analyse */
+ if (result) {
+ if (!cptr[0])
+ goto trans;
+
+ regs[0] = mfspr(SPRN_TAR);
+ regs[1] = mfspr(SPRN_PPR);
+ regs[2] = mfspr(SPRN_DSCR);
+
+ shmdt(&cptr);
+ printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+ user_read, regs[0], regs[1], regs[2]);
+
+ ret = validate_tar_registers(regs, TAR_4, PPR_4, DSCR_4);
+ if (ret)
+ exit(1);
+ exit(0);
+ }
+ shmdt(&cptr);
+ exit(1);
+}
+
+int trace_tm_tar(pid_t child)
+{
+ unsigned long regs[3];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_tar_registers(child, regs));
+ printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+ ptrace_read_running, regs[0], regs[1], regs[2]);
+
+ FAIL_IF(validate_tar_registers(regs, TAR_2, PPR_2, DSCR_2));
+ FAIL_IF(show_tm_checkpointed_state(child, regs));
+ printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+ ptrace_read_ckpt, regs[0], regs[1], regs[2]);
+
+ FAIL_IF(validate_tar_registers(regs, TAR_1, PPR_1, DSCR_1));
+ FAIL_IF(write_ckpt_tar_registers(child, TAR_4, PPR_4, DSCR_4));
+ printf("%-30s TAR: %u PPR: %lx DSCR: %u\n",
+ ptrace_write_ckpt, TAR_4, PPR_4, DSCR_4);
+
+ pptr[0] = 1;
+ FAIL_IF(stop_trace(child));
+ return TEST_PASS;
+}
+
+int ptrace_tm_tar(void)
+{
+ pid_t pid;
+ int ret, status;
+
+ SKIP_IF(!have_htm());
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
+ pid = fork();
+ if (pid == 0)
+ tm_tar();
+
+ pptr = (unsigned long *)shmat(shm_id, NULL, 0);
+ pptr[0] = 0;
+
+ if (pid) {
+ while (!pptr[1])
+ asm volatile("" : : : "memory");
+ ret = trace_tm_tar(pid);
+ if (ret) {
+ kill(pid, SIGTERM);
+ shmdt(&pptr);
+ shmctl(shm_id, IPC_RMID, NULL);
+ return TEST_FAIL;
+ }
+ shmdt(&pptr);
+
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_tm_tar, "ptrace_tm_tar");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
new file mode 100644
index 000000000..dfba80058
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
@@ -0,0 +1,167 @@
+/*
+ * Ptrace test for VMX/VSX registers in the TM context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "tm.h"
+#include "ptrace-vsx.h"
+
+int shm_id;
+unsigned long *cptr, *pptr;
+
+unsigned long fp_load[VEC_MAX];
+unsigned long fp_store[VEC_MAX];
+unsigned long fp_load_ckpt[VEC_MAX];
+unsigned long fp_load_ckpt_new[VEC_MAX];
+
+__attribute__((used)) void load_vsx(void)
+{
+ loadvsx(fp_load, 0);
+}
+
+__attribute__((used)) void load_vsx_ckpt(void)
+{
+ loadvsx(fp_load_ckpt, 0);
+}
+
+void tm_vsx(void)
+{
+ unsigned long result, texasr;
+ int ret;
+
+ cptr = (unsigned long *)shmat(shm_id, NULL, 0);
+
+trans:
+ cptr[1] = 0;
+ asm __volatile__(
+ "bl load_vsx_ckpt;"
+
+ "1: ;"
+ "tbegin.;"
+ "beq 2f;"
+
+ "bl load_vsx;"
+ "tsuspend.;"
+ "li 7, 1;"
+ "stw 7, 0(%[cptr1]);"
+ "tresume.;"
+ "b .;"
+
+ "tend.;"
+ "li 0, 0;"
+ "ori %[res], 0, 0;"
+ "b 3f;"
+
+ "2: ;"
+ "li 0, 1;"
+ "ori %[res], 0, 0;"
+ "mfspr %[texasr], %[sprn_texasr];"
+
+ "3: ;"
+ : [res] "=r" (result), [texasr] "=r" (texasr)
+ : [sprn_texasr] "i" (SPRN_TEXASR), [cptr1] "b" (&cptr[1])
+ : "memory", "r0", "r1", "r3", "r4",
+ "r7", "r8", "r9", "r10", "r11"
+ );
+
+ if (result) {
+ if (!cptr[0])
+ goto trans;
+
+ shmdt((void *)cptr);
+ storevsx(fp_store, 0);
+ ret = compare_vsx_vmx(fp_store, fp_load_ckpt_new);
+ if (ret)
+ exit(1);
+ exit(0);
+ }
+ shmdt((void *)cptr);
+ exit(1);
+}
+
+int trace_tm_vsx(pid_t child)
+{
+ unsigned long vsx[VSX_MAX];
+ unsigned long vmx[VMX_MAX + 2][2];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_vsx(child, vsx));
+ FAIL_IF(validate_vsx(vsx, fp_load));
+ FAIL_IF(show_vmx(child, vmx));
+ FAIL_IF(validate_vmx(vmx, fp_load));
+ FAIL_IF(show_vsx_ckpt(child, vsx));
+ FAIL_IF(validate_vsx(vsx, fp_load_ckpt));
+ FAIL_IF(show_vmx_ckpt(child, vmx));
+ FAIL_IF(validate_vmx(vmx, fp_load_ckpt));
+ memset(vsx, 0, sizeof(vsx));
+ memset(vmx, 0, sizeof(vmx));
+
+ load_vsx_vmx(fp_load_ckpt_new, vsx, vmx);
+
+ FAIL_IF(write_vsx_ckpt(child, vsx));
+ FAIL_IF(write_vmx_ckpt(child, vmx));
+ pptr[0] = 1;
+ FAIL_IF(stop_trace(child));
+ return TEST_PASS;
+}
+
+int ptrace_tm_vsx(void)
+{
+ pid_t pid;
+ int ret, status, i;
+
+ SKIP_IF(!have_htm());
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
+
+ for (i = 0; i < 128; i++) {
+ fp_load[i] = 1 + rand();
+ fp_load_ckpt[i] = 1 + 2 * rand();
+ fp_load_ckpt_new[i] = 1 + 3 * rand();
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ return TEST_FAIL;
+ }
+
+ if (pid == 0)
+ tm_vsx();
+
+ if (pid) {
+ pptr = (unsigned long *)shmat(shm_id, NULL, 0);
+ while (!pptr[1])
+ asm volatile("" : : : "memory");
+
+ ret = trace_tm_vsx(pid);
+ if (ret) {
+ kill(pid, SIGKILL);
+ shmdt((void *)pptr);
+ shmctl(shm_id, IPC_RMID, NULL);
+ return TEST_FAIL;
+ }
+
+ shmdt((void *)pptr);
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_tm_vsx, "ptrace_tm_vsx");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
new file mode 100644
index 000000000..04084ee7d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
@@ -0,0 +1,117 @@
+/*
+ * Ptrace test for VMX/VSX registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "ptrace-vsx.h"
+
+/* Tracer and Tracee Shared Data */
+int shm_id;
+int *cptr, *pptr;
+
+unsigned long fp_load[VEC_MAX];
+unsigned long fp_load_new[VEC_MAX];
+unsigned long fp_store[VEC_MAX];
+
+void vsx(void)
+{
+ int ret;
+
+ cptr = (int *)shmat(shm_id, NULL, 0);
+ loadvsx(fp_load, 0);
+ cptr[1] = 1;
+
+ while (!cptr[0])
+ asm volatile("" : : : "memory");
+ shmdt((void *) cptr);
+
+ storevsx(fp_store, 0);
+ ret = compare_vsx_vmx(fp_store, fp_load_new);
+ if (ret)
+ exit(1);
+ exit(0);
+}
+
+int trace_vsx(pid_t child)
+{
+ unsigned long vsx[VSX_MAX];
+ unsigned long vmx[VMX_MAX + 2][2];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_vsx(child, vsx));
+ FAIL_IF(validate_vsx(vsx, fp_load));
+ FAIL_IF(show_vmx(child, vmx));
+ FAIL_IF(validate_vmx(vmx, fp_load));
+
+ memset(vsx, 0, sizeof(vsx));
+ memset(vmx, 0, sizeof(vmx));
+ load_vsx_vmx(fp_load_new, vsx, vmx);
+
+ FAIL_IF(write_vsx(child, vsx));
+ FAIL_IF(write_vmx(child, vmx));
+ FAIL_IF(stop_trace(child));
+
+ return TEST_PASS;
+}
+
+int ptrace_vsx(void)
+{
+ pid_t pid;
+ int ret, status, i;
+
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
+
+ for (i = 0; i < VEC_MAX; i++)
+ fp_load[i] = i + rand();
+
+ for (i = 0; i < VEC_MAX; i++)
+ fp_load_new[i] = i + 2 * rand();
+
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ return TEST_FAIL;
+ }
+
+ if (pid == 0)
+ vsx();
+
+ if (pid) {
+ pptr = (int *)shmat(shm_id, NULL, 0);
+ while (!pptr[1])
+ asm volatile("" : : : "memory");
+
+ ret = trace_vsx(pid);
+ if (ret) {
+ kill(pid, SIGTERM);
+ shmdt((void *)pptr);
+ shmctl(shm_id, IPC_RMID, NULL);
+ return TEST_FAIL;
+ }
+
+ pptr[0] = 1;
+ shmdt((void *)pptr);
+
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_vsx, "ptrace_vsx");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.h b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.h
new file mode 100644
index 000000000..f4e4b427c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#define VEC_MAX 128
+#define VSX_MAX 32
+#define VMX_MAX 32
+
+/*
+ * unsigned long vsx[32]
+ * unsigned long load[128]
+ */
+int validate_vsx(unsigned long *vsx, unsigned long *load)
+{
+ int i;
+
+ for (i = 0; i < VSX_MAX; i++) {
+ if (vsx[i] != load[2 * i + 1]) {
+ printf("vsx[%d]: %lx load[%d] %lx\n",
+ i, vsx[i], 2 * i + 1, load[2 * i + 1]);
+ return TEST_FAIL;
+ }
+ }
+ return TEST_PASS;
+}
+
+/*
+ * unsigned long vmx[32][2]
+ * unsigned long load[128]
+ */
+int validate_vmx(unsigned long vmx[][2], unsigned long *load)
+{
+ int i;
+
+ for (i = 0; i < VMX_MAX; i++) {
+ #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ if ((vmx[i][0] != load[64 + 2 * i]) ||
+ (vmx[i][1] != load[65 + 2 * i])) {
+ printf("vmx[%d][0]: %lx load[%d] %lx\n",
+ i, vmx[i][0], 64 + 2 * i,
+ load[64 + 2 * i]);
+ printf("vmx[%d][1]: %lx load[%d] %lx\n",
+ i, vmx[i][1], 65 + 2 * i,
+ load[65 + 2 * i]);
+ return TEST_FAIL;
+ }
+ #else /*
+ * In LE each value pair is stored in an
+ * alternate manner.
+ */
+ if ((vmx[i][0] != load[65 + 2 * i]) ||
+ (vmx[i][1] != load[64 + 2 * i])) {
+ printf("vmx[%d][0]: %lx load[%d] %lx\n",
+ i, vmx[i][0], 65 + 2 * i,
+ load[65 + 2 * i]);
+ printf("vmx[%d][1]: %lx load[%d] %lx\n",
+ i, vmx[i][1], 64 + 2 * i,
+ load[64 + 2 * i]);
+ return TEST_FAIL;
+ }
+ #endif
+ }
+ return TEST_PASS;
+}
+
+/*
+ * unsigned long store[128]
+ * unsigned long load[128]
+ */
+int compare_vsx_vmx(unsigned long *store, unsigned long *load)
+{
+ int i;
+
+ for (i = 0; i < VSX_MAX; i++) {
+ if (store[1 + 2 * i] != load[1 + 2 * i]) {
+ printf("store[%d]: %lx load[%d] %lx\n",
+ 1 + 2 * i, store[i],
+ 1 + 2 * i, load[i]);
+ return TEST_FAIL;
+ }
+ }
+
+ #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ for (i = 64; i < VEC_MAX; i++) {
+ if (store[i] != load[i]) {
+ printf("store[%d]: %lx load[%d] %lx\n",
+ i, store[i], i, load[i]);
+ return TEST_FAIL;
+ }
+ }
+ #else /* In LE each value pair is stored in an alternate manner */
+ for (i = 64; i < VEC_MAX; i++) {
+ if (!(i % 2) && (store[i] != load[i+1])) {
+ printf("store[%d]: %lx load[%d] %lx\n",
+ i, store[i], i+1, load[i+1]);
+ return TEST_FAIL;
+ }
+ if ((i % 2) && (store[i] != load[i-1])) {
+ printf("here store[%d]: %lx load[%d] %lx\n",
+ i, store[i], i-1, load[i-1]);
+ return TEST_FAIL;
+ }
+ }
+ #endif
+ return TEST_PASS;
+}
+
+void load_vsx_vmx(unsigned long *load, unsigned long *vsx,
+ unsigned long vmx[][2])
+{
+ int i;
+
+ for (i = 0; i < VSX_MAX; i++)
+ vsx[i] = load[1 + 2 * i];
+
+ for (i = 0; i < VMX_MAX; i++) {
+ vmx[i][0] = load[64 + 2 * i];
+ vmx[i][1] = load[65 + 2 * i];
+ }
+}
+
+void loadvsx(void *p, int tmp);
+void storevsx(void *p, int tmp);
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h b/tools/testing/selftests/powerpc/ptrace/ptrace.h
new file mode 100644
index 000000000..34201cfa8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h
@@ -0,0 +1,749 @@
+/*
+ * Ptrace interface test helper functions
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <inttypes.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/ptrace.h>
+#include <sys/ioctl.h>
+#include <sys/uio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/signal.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/user.h>
+#include <linux/elf.h>
+#include <linux/types.h>
+#include <linux/auxvec.h>
+#include "reg.h"
+#include "utils.h"
+
+#define TEST_PASS 0
+#define TEST_FAIL 1
+
+struct fpr_regs {
+ unsigned long fpr[32];
+ unsigned long fpscr;
+};
+
+struct tm_spr_regs {
+ unsigned long tm_tfhar;
+ unsigned long tm_texasr;
+ unsigned long tm_tfiar;
+};
+
+#ifndef NT_PPC_TAR
+#define NT_PPC_TAR 0x103
+#define NT_PPC_PPR 0x104
+#define NT_PPC_DSCR 0x105
+#define NT_PPC_EBB 0x106
+#define NT_PPC_PMU 0x107
+#define NT_PPC_TM_CGPR 0x108
+#define NT_PPC_TM_CFPR 0x109
+#define NT_PPC_TM_CVMX 0x10a
+#define NT_PPC_TM_CVSX 0x10b
+#define NT_PPC_TM_SPR 0x10c
+#define NT_PPC_TM_CTAR 0x10d
+#define NT_PPC_TM_CPPR 0x10e
+#define NT_PPC_TM_CDSCR 0x10f
+#endif
+
+/* Basic ptrace operations */
+int start_trace(pid_t child)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_ATTACH, child, NULL, NULL);
+ if (ret) {
+ perror("ptrace(PTRACE_ATTACH) failed");
+ return TEST_FAIL;
+ }
+ ret = waitpid(child, NULL, 0);
+ if (ret != child) {
+ perror("waitpid() failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int stop_trace(pid_t child)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_DETACH, child, NULL, NULL);
+ if (ret) {
+ perror("ptrace(PTRACE_DETACH) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int cont_trace(pid_t child)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_CONT, child, NULL, NULL);
+ if (ret) {
+ perror("ptrace(PTRACE_CONT) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int ptrace_read_regs(pid_t child, unsigned long type, unsigned long regs[],
+ int n)
+{
+ struct iovec iov;
+ long ret;
+
+ FAIL_IF(start_trace(child));
+
+ iov.iov_base = regs;
+ iov.iov_len = n * sizeof(unsigned long);
+
+ ret = ptrace(PTRACE_GETREGSET, child, type, &iov);
+ if (ret)
+ return ret;
+
+ FAIL_IF(stop_trace(child));
+
+ return TEST_PASS;
+}
+
+long ptrace_write_regs(pid_t child, unsigned long type, unsigned long regs[],
+ int n)
+{
+ struct iovec iov;
+ long ret;
+
+ FAIL_IF(start_trace(child));
+
+ iov.iov_base = regs;
+ iov.iov_len = n * sizeof(unsigned long);
+
+ ret = ptrace(PTRACE_SETREGSET, child, type, &iov);
+
+ FAIL_IF(stop_trace(child));
+
+ return ret;
+}
+
+/* TAR, PPR, DSCR */
+int show_tar_registers(pid_t child, unsigned long *out)
+{
+ struct iovec iov;
+ unsigned long *reg;
+ int ret;
+
+ reg = malloc(sizeof(unsigned long));
+ if (!reg) {
+ perror("malloc() failed");
+ return TEST_FAIL;
+ }
+ iov.iov_base = (u64 *) reg;
+ iov.iov_len = sizeof(unsigned long);
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TAR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+ if (out)
+ out[0] = *reg;
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_PPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+ if (out)
+ out[1] = *reg;
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_DSCR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+ if (out)
+ out[2] = *reg;
+
+ free(reg);
+ return TEST_PASS;
+fail:
+ free(reg);
+ return TEST_FAIL;
+}
+
+int write_tar_registers(pid_t child, unsigned long tar,
+ unsigned long ppr, unsigned long dscr)
+{
+ struct iovec iov;
+ unsigned long *reg;
+ int ret;
+
+ reg = malloc(sizeof(unsigned long));
+ if (!reg) {
+ perror("malloc() failed");
+ return TEST_FAIL;
+ }
+
+ iov.iov_base = (u64 *) reg;
+ iov.iov_len = sizeof(unsigned long);
+
+ *reg = tar;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TAR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_SETREGSET) failed");
+ goto fail;
+ }
+
+ *reg = ppr;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_PPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_SETREGSET) failed");
+ goto fail;
+ }
+
+ *reg = dscr;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_DSCR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_SETREGSET) failed");
+ goto fail;
+ }
+
+ free(reg);
+ return TEST_PASS;
+fail:
+ free(reg);
+ return TEST_FAIL;
+}
+
+int show_tm_checkpointed_state(pid_t child, unsigned long *out)
+{
+ struct iovec iov;
+ unsigned long *reg;
+ int ret;
+
+ reg = malloc(sizeof(unsigned long));
+ if (!reg) {
+ perror("malloc() failed");
+ return TEST_FAIL;
+ }
+
+ iov.iov_base = (u64 *) reg;
+ iov.iov_len = sizeof(unsigned long);
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CTAR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+ if (out)
+ out[0] = *reg;
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CPPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+ if (out)
+ out[1] = *reg;
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CDSCR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+ if (out)
+ out[2] = *reg;
+
+ free(reg);
+ return TEST_PASS;
+
+fail:
+ free(reg);
+ return TEST_FAIL;
+}
+
+int write_ckpt_tar_registers(pid_t child, unsigned long tar,
+ unsigned long ppr, unsigned long dscr)
+{
+ struct iovec iov;
+ unsigned long *reg;
+ int ret;
+
+ reg = malloc(sizeof(unsigned long));
+ if (!reg) {
+ perror("malloc() failed");
+ return TEST_FAIL;
+ }
+
+ iov.iov_base = (u64 *) reg;
+ iov.iov_len = sizeof(unsigned long);
+
+ *reg = tar;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CTAR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+
+ *reg = ppr;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CPPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+
+ *reg = dscr;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CDSCR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+
+ free(reg);
+ return TEST_PASS;
+fail:
+ free(reg);
+ return TEST_FAIL;
+}
+
+/* FPR */
+int show_fpr(pid_t child, unsigned long *fpr)
+{
+ struct fpr_regs *regs;
+ int ret, i;
+
+ regs = (struct fpr_regs *) malloc(sizeof(struct fpr_regs));
+ ret = ptrace(PTRACE_GETFPREGS, child, NULL, regs);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ if (fpr) {
+ for (i = 0; i < 32; i++)
+ fpr[i] = regs->fpr[i];
+ }
+ return TEST_PASS;
+}
+
+int write_fpr(pid_t child, unsigned long val)
+{
+ struct fpr_regs *regs;
+ int ret, i;
+
+ regs = (struct fpr_regs *) malloc(sizeof(struct fpr_regs));
+ ret = ptrace(PTRACE_GETFPREGS, child, NULL, regs);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ for (i = 0; i < 32; i++)
+ regs->fpr[i] = val;
+
+ ret = ptrace(PTRACE_SETFPREGS, child, NULL, regs);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int show_ckpt_fpr(pid_t child, unsigned long *fpr)
+{
+ struct fpr_regs *regs;
+ struct iovec iov;
+ int ret, i;
+
+ regs = (struct fpr_regs *) malloc(sizeof(struct fpr_regs));
+ iov.iov_base = regs;
+ iov.iov_len = sizeof(struct fpr_regs);
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CFPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ if (fpr) {
+ for (i = 0; i < 32; i++)
+ fpr[i] = regs->fpr[i];
+ }
+
+ return TEST_PASS;
+}
+
+int write_ckpt_fpr(pid_t child, unsigned long val)
+{
+ struct fpr_regs *regs;
+ struct iovec iov;
+ int ret, i;
+
+ regs = (struct fpr_regs *) malloc(sizeof(struct fpr_regs));
+ iov.iov_base = regs;
+ iov.iov_len = sizeof(struct fpr_regs);
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CFPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ for (i = 0; i < 32; i++)
+ regs->fpr[i] = val;
+
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CFPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+/* GPR */
+int show_gpr(pid_t child, unsigned long *gpr)
+{
+ struct pt_regs *regs;
+ int ret, i;
+
+ regs = (struct pt_regs *) malloc(sizeof(struct pt_regs));
+ if (!regs) {
+ perror("malloc() failed");
+ return TEST_FAIL;
+ }
+
+ ret = ptrace(PTRACE_GETREGS, child, NULL, regs);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ if (gpr) {
+ for (i = 14; i < 32; i++)
+ gpr[i-14] = regs->gpr[i];
+ }
+
+ return TEST_PASS;
+}
+
+int write_gpr(pid_t child, unsigned long val)
+{
+ struct pt_regs *regs;
+ int i, ret;
+
+ regs = (struct pt_regs *) malloc(sizeof(struct pt_regs));
+ if (!regs) {
+ perror("malloc() failed");
+ return TEST_FAIL;
+ }
+
+ ret = ptrace(PTRACE_GETREGS, child, NULL, regs);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ for (i = 14; i < 32; i++)
+ regs->gpr[i] = val;
+
+ ret = ptrace(PTRACE_SETREGS, child, NULL, regs);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int show_ckpt_gpr(pid_t child, unsigned long *gpr)
+{
+ struct pt_regs *regs;
+ struct iovec iov;
+ int ret, i;
+
+ regs = (struct pt_regs *) malloc(sizeof(struct pt_regs));
+ if (!regs) {
+ perror("malloc() failed");
+ return TEST_FAIL;
+ }
+
+ iov.iov_base = (u64 *) regs;
+ iov.iov_len = sizeof(struct pt_regs);
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CGPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ if (gpr) {
+ for (i = 14; i < 32; i++)
+ gpr[i-14] = regs->gpr[i];
+ }
+
+ return TEST_PASS;
+}
+
+int write_ckpt_gpr(pid_t child, unsigned long val)
+{
+ struct pt_regs *regs;
+ struct iovec iov;
+ int ret, i;
+
+ regs = (struct pt_regs *) malloc(sizeof(struct pt_regs));
+ if (!regs) {
+ perror("malloc() failed\n");
+ return TEST_FAIL;
+ }
+ iov.iov_base = (u64 *) regs;
+ iov.iov_len = sizeof(struct pt_regs);
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CGPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ for (i = 14; i < 32; i++)
+ regs->gpr[i] = val;
+
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CGPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+/* VMX */
+int show_vmx(pid_t child, unsigned long vmx[][2])
+{
+ int ret;
+
+ ret = ptrace(PTRACE_GETVRREGS, child, 0, vmx);
+ if (ret) {
+ perror("ptrace(PTRACE_GETVRREGS) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int show_vmx_ckpt(pid_t child, unsigned long vmx[][2])
+{
+ unsigned long regs[34][2];
+ struct iovec iov;
+ int ret;
+
+ iov.iov_base = (u64 *) regs;
+ iov.iov_len = sizeof(regs);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CVMX, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET, NT_PPC_TM_CVMX) failed");
+ return TEST_FAIL;
+ }
+ memcpy(vmx, regs, sizeof(regs));
+ return TEST_PASS;
+}
+
+
+int write_vmx(pid_t child, unsigned long vmx[][2])
+{
+ int ret;
+
+ ret = ptrace(PTRACE_SETVRREGS, child, 0, vmx);
+ if (ret) {
+ perror("ptrace(PTRACE_SETVRREGS) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int write_vmx_ckpt(pid_t child, unsigned long vmx[][2])
+{
+ unsigned long regs[34][2];
+ struct iovec iov;
+ int ret;
+
+ memcpy(regs, vmx, sizeof(regs));
+ iov.iov_base = (u64 *) regs;
+ iov.iov_len = sizeof(regs);
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CVMX, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_SETREGSET, NT_PPC_TM_CVMX) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+/* VSX */
+int show_vsx(pid_t child, unsigned long *vsx)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_GETVSRREGS, child, 0, vsx);
+ if (ret) {
+ perror("ptrace(PTRACE_GETVSRREGS) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int show_vsx_ckpt(pid_t child, unsigned long *vsx)
+{
+ unsigned long regs[32];
+ struct iovec iov;
+ int ret;
+
+ iov.iov_base = (u64 *) regs;
+ iov.iov_len = sizeof(regs);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CVSX, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET, NT_PPC_TM_CVSX) failed");
+ return TEST_FAIL;
+ }
+ memcpy(vsx, regs, sizeof(regs));
+ return TEST_PASS;
+}
+
+int write_vsx(pid_t child, unsigned long *vsx)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_SETVSRREGS, child, 0, vsx);
+ if (ret) {
+ perror("ptrace(PTRACE_SETVSRREGS) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int write_vsx_ckpt(pid_t child, unsigned long *vsx)
+{
+ unsigned long regs[32];
+ struct iovec iov;
+ int ret;
+
+ memcpy(regs, vsx, sizeof(regs));
+ iov.iov_base = (u64 *) regs;
+ iov.iov_len = sizeof(regs);
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CVSX, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_SETREGSET, NT_PPC_TM_CVSX) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+/* TM SPR */
+int show_tm_spr(pid_t child, struct tm_spr_regs *out)
+{
+ struct tm_spr_regs *regs;
+ struct iovec iov;
+ int ret;
+
+ regs = (struct tm_spr_regs *) malloc(sizeof(struct tm_spr_regs));
+ if (!regs) {
+ perror("malloc() failed");
+ return TEST_FAIL;
+ }
+
+ iov.iov_base = (u64 *) regs;
+ iov.iov_len = sizeof(struct tm_spr_regs);
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_SPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ if (out)
+ memcpy(out, regs, sizeof(struct tm_spr_regs));
+
+ return TEST_PASS;
+}
+
+
+
+/* Analyse TEXASR after TM failure */
+inline unsigned long get_tfiar(void)
+{
+ unsigned long ret;
+
+ asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_TFIAR));
+ return ret;
+}
+
+void analyse_texasr(unsigned long texasr)
+{
+ printf("TEXASR: %16lx\t", texasr);
+
+ if (texasr & TEXASR_FP)
+ printf("TEXASR_FP ");
+
+ if (texasr & TEXASR_DA)
+ printf("TEXASR_DA ");
+
+ if (texasr & TEXASR_NO)
+ printf("TEXASR_NO ");
+
+ if (texasr & TEXASR_FO)
+ printf("TEXASR_FO ");
+
+ if (texasr & TEXASR_SIC)
+ printf("TEXASR_SIC ");
+
+ if (texasr & TEXASR_NTC)
+ printf("TEXASR_NTC ");
+
+ if (texasr & TEXASR_TC)
+ printf("TEXASR_TC ");
+
+ if (texasr & TEXASR_TIC)
+ printf("TEXASR_TIC ");
+
+ if (texasr & TEXASR_IC)
+ printf("TEXASR_IC ");
+
+ if (texasr & TEXASR_IFC)
+ printf("TEXASR_IFC ");
+
+ if (texasr & TEXASR_ABT)
+ printf("TEXASR_ABT ");
+
+ if (texasr & TEXASR_SPD)
+ printf("TEXASR_SPD ");
+
+ if (texasr & TEXASR_HV)
+ printf("TEXASR_HV ");
+
+ if (texasr & TEXASR_PR)
+ printf("TEXASR_PR ");
+
+ if (texasr & TEXASR_FS)
+ printf("TEXASR_FS ");
+
+ if (texasr & TEXASR_TE)
+ printf("TEXASR_TE ");
+
+ if (texasr & TEXASR_ROT)
+ printf("TEXASR_ROT ");
+
+ printf("TFIAR :%lx\n", get_tfiar());
+}
+
+void store_gpr(unsigned long *addr);
+void store_fpr(float *addr);
diff --git a/tools/testing/selftests/powerpc/scripts/hmi.sh b/tools/testing/selftests/powerpc/scripts/hmi.sh
new file mode 100755
index 000000000..83fb253ae
--- /dev/null
+++ b/tools/testing/selftests/powerpc/scripts/hmi.sh
@@ -0,0 +1,89 @@
+#!/bin/sh
+#
+# Copyright 2015, Daniel Axtens, IBM Corporation
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+
+# do we have ./getscom, ./putscom?
+if [ -x ./getscom ] && [ -x ./putscom ]; then
+ GETSCOM=./getscom
+ PUTSCOM=./putscom
+elif which getscom > /dev/null; then
+ GETSCOM=$(which getscom)
+ PUTSCOM=$(which putscom)
+else
+ cat <<EOF
+Can't find getscom/putscom in . or \$PATH.
+See https://github.com/open-power/skiboot.
+The tool is in external/xscom-utils
+EOF
+ exit 1
+fi
+
+# We will get 8 HMI events per injection
+# todo: deal with things being offline
+expected_hmis=8
+COUNT_HMIS() {
+ dmesg | grep -c 'Harmless Hypervisor Maintenance interrupt'
+}
+
+# massively expand snooze delay, allowing injection on all cores
+ppc64_cpu --smt-snooze-delay=1000000000
+
+# when we exit, restore it
+trap "ppc64_cpu --smt-snooze-delay=100" 0 1
+
+# for each chip+core combination
+# todo - less fragile parsing
+egrep -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog |
+while read chipcore; do
+ chip=$(echo "$chipcore"|awk '{print $3}')
+ core=$(echo "$chipcore"|awk '{print $5}')
+ fir="0x1${core}013100"
+
+ # verify that Core FIR is zero as expected
+ if [ "$($GETSCOM -c 0x${chip} $fir)" != 0 ]; then
+ echo "FIR was not zero before injection for chip $chip, core $core. Aborting!"
+ echo "Result of $GETSCOM -c 0x${chip} $fir:"
+ $GETSCOM -c 0x${chip} $fir
+ echo "If you get a -5 error, the core may be in idle state. Try stress-ng."
+ echo "Otherwise, try $PUTSCOM -c 0x${chip} $fir 0"
+ exit 1
+ fi
+
+ # keep track of the number of HMIs handled
+ old_hmis=$(COUNT_HMIS)
+
+ # do injection, adding a marker to dmesg for clarity
+ echo "Injecting HMI on core $core, chip $chip" | tee /dev/kmsg
+ # inject a RegFile recoverable error
+ if ! $PUTSCOM -c 0x${chip} $fir 2000000000000000 > /dev/null; then
+ echo "Error injecting. Aborting!"
+ exit 1
+ fi
+
+ # now we want to wait for all the HMIs to be processed
+ # we expect one per thread on the core
+ i=0;
+ new_hmis=$(COUNT_HMIS)
+ while [ $new_hmis -lt $((old_hmis + expected_hmis)) ] && [ $i -lt 12 ]; do
+ echo "Seen $((new_hmis - old_hmis)) HMI(s) out of $expected_hmis expected, sleeping"
+ sleep 5;
+ i=$((i + 1))
+ new_hmis=$(COUNT_HMIS)
+ done
+ if [ $i = 12 ]; then
+ echo "Haven't seen expected $expected_hmis recoveries after 1 min. Aborting."
+ exit 1
+ fi
+ echo "Processed $expected_hmis events; presumed success. Check dmesg."
+ echo ""
+done
diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore
new file mode 100644
index 000000000..1b89224a8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/.gitignore
@@ -0,0 +1,2 @@
+signal
+signal_tm
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
new file mode 100644
index 000000000..209a958dc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := signal signal_tm
+
+CFLAGS += -maltivec
+$(OUTPUT)/signal_tm: CFLAGS += -mhtm
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c signal.S
diff --git a/tools/testing/selftests/powerpc/signal/signal.S b/tools/testing/selftests/powerpc/signal/signal.S
new file mode 100644
index 000000000..322f2f1fc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/signal.S
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+
+/* long signal_self(pid_t pid, int sig); */
+FUNC_START(signal_self)
+ li r0,37 /* sys_kill */
+ /* r3 already has our pid in it */
+ /* r4 already has signal type in it */
+ sc
+ bc 4,3,1f
+ subfze r3,r3
+1: blr
+FUNC_END(signal_self)
+
+/* long tm_signal_self(pid_t pid, int sig, int *ret); */
+FUNC_START(tm_signal_self)
+ PUSH_BASIC_STACK(8)
+ std r5,STACK_FRAME_PARAM(0)(sp) /* ret */
+ tbegin.
+ beq 1f
+ tsuspend.
+ li r0,37 /* sys_kill */
+ /* r3 already has our pid in it */
+ /* r4 already has signal type in it */
+ sc
+ ld r5,STACK_FRAME_PARAM(0)(sp) /* ret */
+ bc 4,3,2f
+ subfze r3,r3
+2: std r3,0(r5)
+ tabort. 0
+ tresume. /* Be nice to some cleanup, jumps back to tbegin then to 1: */
+ /*
+ * Transaction should be proper doomed and we should never get
+ * here
+ */
+ li r3,1
+ POP_BASIC_STACK(8)
+ blr
+1: li r3,0
+ POP_BASIC_STACK(8)
+ blr
+FUNC_END(tm_signal_self)
diff --git a/tools/testing/selftests/powerpc/signal/signal.c b/tools/testing/selftests/powerpc/signal/signal.c
new file mode 100644
index 000000000..e7dedd28b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/signal.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Sending one self a signal should always get delivered.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+
+#define MAX_ATTEMPT 500000
+#define TIMEOUT 5
+
+extern long signal_self(pid_t pid, int sig);
+
+static sig_atomic_t signaled;
+static sig_atomic_t fail;
+
+static void signal_handler(int sig)
+{
+ if (sig == SIGUSR1)
+ signaled = 1;
+ else
+ fail = 1;
+}
+
+static int test_signal()
+{
+ int i;
+ struct sigaction act;
+ pid_t ppid = getpid();
+ pid_t pid;
+
+ act.sa_handler = signal_handler;
+ act.sa_flags = 0;
+ sigemptyset(&act.sa_mask);
+ if (sigaction(SIGUSR1, &act, NULL) < 0) {
+ perror("sigaction SIGUSR1");
+ exit(1);
+ }
+ if (sigaction(SIGALRM, &act, NULL) < 0) {
+ perror("sigaction SIGALRM");
+ exit(1);
+ }
+
+ /* Don't do this for MAX_ATTEMPT, its simply too long */
+ for(i = 0; i < 1000; i++) {
+ pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+ if (pid == 0) {
+ signal_self(ppid, SIGUSR1);
+ exit(1);
+ } else {
+ alarm(0); /* Disable any pending */
+ alarm(2);
+ while (!signaled && !fail)
+ asm volatile("": : :"memory");
+ if (!signaled) {
+ fprintf(stderr, "Didn't get signal from child\n");
+ FAIL_IF(1); /* For the line number */
+ }
+ /* Otherwise we'll loop too fast and fork() will eventually fail */
+ waitpid(pid, NULL, 0);
+ }
+ }
+
+ for (i = 0; i < MAX_ATTEMPT; i++) {
+ long rc;
+
+ alarm(0); /* Disable any pending */
+ signaled = 0;
+ alarm(TIMEOUT);
+ rc = signal_self(ppid, SIGUSR1);
+ if (rc) {
+ fprintf(stderr, "(%d) Fail reason: %d rc=0x%lx",
+ i, fail, rc);
+ FAIL_IF(1); /* For the line number */
+ }
+ while (!signaled && !fail)
+ asm volatile("": : :"memory");
+ if (!signaled) {
+ fprintf(stderr, "(%d) Fail reason: %d rc=0x%lx",
+ i, fail, rc);
+ FAIL_IF(1); /* For the line number */
+ }
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ test_harness_set_timeout(300);
+ return test_harness(test_signal, "signal");
+}
diff --git a/tools/testing/selftests/powerpc/signal/signal_tm.c b/tools/testing/selftests/powerpc/signal/signal_tm.c
new file mode 100644
index 000000000..2e7451a37
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/signal_tm.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Sending one self a signal should always get delivered.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+#include "../tm/tm.h"
+
+#define MAX_ATTEMPT 500000
+#define TIMEOUT 10
+
+extern long tm_signal_self(pid_t pid, int sig, long *ret);
+
+static sig_atomic_t signaled;
+static sig_atomic_t fail;
+
+static void signal_handler(int sig)
+{
+ if (tcheck_active()) {
+ fail = 2;
+ return;
+ }
+
+ if (sig == SIGUSR1)
+ signaled = 1;
+ else
+ fail = 1;
+}
+
+static int test_signal_tm()
+{
+ int i;
+ struct sigaction act;
+
+ act.sa_handler = signal_handler;
+ act.sa_flags = 0;
+ sigemptyset(&act.sa_mask);
+ if (sigaction(SIGUSR1, &act, NULL) < 0) {
+ perror("sigaction SIGUSR1");
+ exit(1);
+ }
+ if (sigaction(SIGALRM, &act, NULL) < 0) {
+ perror("sigaction SIGALRM");
+ exit(1);
+ }
+
+ SKIP_IF(!have_htm());
+
+ for (i = 0; i < MAX_ATTEMPT; i++) {
+ /*
+ * If anything bad happens in ASM and we fail to set ret
+ * because *handwave* TM this will cause failure
+ */
+ long ret = 0xdead;
+ long rc = 0xbeef;
+
+ alarm(0); /* Disable any pending */
+ signaled = 0;
+ alarm(TIMEOUT);
+ FAIL_IF(tcheck_transactional());
+ rc = tm_signal_self(getpid(), SIGUSR1, &ret);
+ if (ret == 0xdead)
+ /*
+ * This basically means the transaction aborted before we
+ * even got to the suspend... this is crazy but it
+ * happens.
+ * Yes this also means we might never make forward
+ * progress... the alarm() will trip eventually...
+ */
+ continue;
+
+ if (rc || ret) {
+ /* Ret is actually an errno */
+ printf("TEXASR 0x%016lx, TFIAR 0x%016lx\n",
+ __builtin_get_texasr(), __builtin_get_tfiar());
+ fprintf(stderr, "(%d) Fail reason: %d rc=0x%lx ret=0x%lx\n",
+ i, fail, rc, ret);
+ FAIL_IF(ret);
+ }
+ while(!signaled && !fail)
+ asm volatile("": : :"memory");
+ if (!signaled) {
+ fprintf(stderr, "(%d) Fail reason: %d rc=0x%lx ret=0x%lx\n",
+ i, fail, rc, ret);
+ FAIL_IF(fail); /* For the line number */
+ }
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_signal_tm, "signal_tm");
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/.gitignore b/tools/testing/selftests/powerpc/stringloops/.gitignore
new file mode 100644
index 000000000..0b43da74e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/.gitignore
@@ -0,0 +1 @@
+memcmp
diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile
new file mode 100644
index 000000000..7fc0623d8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/Makefile
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: GPL-2.0
+# The loops are all 64-bit code
+CFLAGS += -I$(CURDIR)
+
+EXTRA_SOURCES := ../harness.c
+
+build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null 2>&1) then echo "1"; fi)
+
+TEST_GEN_PROGS := memcmp_64 strlen
+
+$(OUTPUT)/memcmp_64: memcmp.c
+$(OUTPUT)/memcmp_64: CFLAGS += -m64 -maltivec
+
+ifeq ($(build_32bit),1)
+$(OUTPUT)/memcmp_32: memcmp.c
+$(OUTPUT)/memcmp_32: CFLAGS += -m32
+
+TEST_GEN_PROGS += memcmp_32
+endif
+
+$(OUTPUT)/strlen: strlen.c string.c
+
+ifeq ($(build_32bit),1)
+$(OUTPUT)/strlen_32: strlen.c
+$(OUTPUT)/strlen_32: CFLAGS += -m32
+
+TEST_GEN_PROGS += strlen_32
+endif
+
+ASFLAGS = $(CFLAGS)
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/cache.h b/tools/testing/selftests/powerpc/stringloops/asm/cache.h
new file mode 100644
index 000000000..8a2840831
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/asm/cache.h
@@ -0,0 +1 @@
+#define IFETCH_ALIGN_BYTES 4
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/export.h b/tools/testing/selftests/powerpc/stringloops/asm/export.h
new file mode 100644
index 000000000..2d14a9b42
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/asm/export.h
@@ -0,0 +1 @@
+#define EXPORT_SYMBOL(x)
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h b/tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h
new file mode 100644
index 000000000..9de413c0c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2009 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * provides masks and opcode images for use by code generation, emulation
+ * and for instructions that older assemblers might not know about
+ */
+#ifndef _ASM_POWERPC_PPC_OPCODE_H
+#define _ASM_POWERPC_PPC_OPCODE_H
+
+
+# define stringify_in_c(...) __VA_ARGS__
+# define ASM_CONST(x) x
+
+
+#define PPC_INST_VCMPEQUD_RC 0x100000c7
+#define PPC_INST_VCMPEQUB_RC 0x10000006
+
+#define __PPC_RC21 (0x1 << 10)
+
+/* macros to insert fields into opcodes */
+#define ___PPC_RA(a) (((a) & 0x1f) << 16)
+#define ___PPC_RB(b) (((b) & 0x1f) << 11)
+#define ___PPC_RS(s) (((s) & 0x1f) << 21)
+#define ___PPC_RT(t) ___PPC_RS(t)
+
+#define VCMPEQUD_RC(vrt, vra, vrb) stringify_in_c(.long PPC_INST_VCMPEQUD_RC | \
+ ___PPC_RT(vrt) | ___PPC_RA(vra) | \
+ ___PPC_RB(vrb) | __PPC_RC21)
+
+#define VCMPEQUB_RC(vrt, vra, vrb) stringify_in_c(.long PPC_INST_VCMPEQUB_RC | \
+ ___PPC_RT(vrt) | ___PPC_RA(vra) | \
+ ___PPC_RB(vrb) | __PPC_RC21)
+
+#endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
new file mode 100644
index 000000000..d2c0a911f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_ASM_H
+#define __PPC_ASM_H
+#include <ppc-asm.h>
+
+#ifndef r1
+#define r1 sp
+#endif
+
+#define _GLOBAL(A) FUNC_START(test_ ## A)
+#define _GLOBAL_TOC(A) FUNC_START(test_ ## A)
+
+#define CONFIG_ALTIVEC
+
+#define R14 r14
+#define R15 r15
+#define R16 r16
+#define R17 r17
+#define R18 r18
+#define R19 r19
+#define R20 r20
+#define R21 r21
+#define R22 r22
+#define R29 r29
+#define R30 r30
+#define R31 r31
+
+#define STACKFRAMESIZE 256
+#define STK_REG(i) (112 + ((i)-14)*8)
+
+#define BEGIN_FTR_SECTION
+#define END_FTR_SECTION_IFSET(val)
+#endif
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c
new file mode 100644
index 000000000..b1fa75469
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "utils.h"
+
+#define SIZE 256
+#define ITERATIONS 10000
+
+#define LARGE_SIZE (5 * 1024)
+#define LARGE_ITERATIONS 1000
+#define LARGE_MAX_OFFSET 32
+#define LARGE_SIZE_START 4096
+
+#define MAX_OFFSET_DIFF_S1_S2 48
+
+int vmx_count;
+int enter_vmx_ops(void)
+{
+ vmx_count++;
+ return 1;
+}
+
+void exit_vmx_ops(void)
+{
+ vmx_count--;
+}
+int test_memcmp(const void *s1, const void *s2, size_t n);
+
+/* test all offsets and lengths */
+static void test_one(char *s1, char *s2, unsigned long max_offset,
+ unsigned long size_start, unsigned long max_size)
+{
+ unsigned long offset, size;
+
+ for (offset = 0; offset < max_offset; offset++) {
+ for (size = size_start; size < (max_size - offset); size++) {
+ int x, y;
+ unsigned long i;
+
+ y = memcmp(s1+offset, s2+offset, size);
+ x = test_memcmp(s1+offset, s2+offset, size);
+
+ if (((x ^ y) < 0) && /* Trick to compare sign */
+ ((x | y) != 0)) { /* check for zero */
+ printf("memcmp returned %d, should have returned %d (offset %ld size %ld)\n", x, y, offset, size);
+
+ for (i = offset; i < offset+size; i++)
+ printf("%02x ", s1[i]);
+ printf("\n");
+
+ for (i = offset; i < offset+size; i++)
+ printf("%02x ", s2[i]);
+ printf("\n");
+ abort();
+ }
+
+ if (vmx_count != 0) {
+ printf("vmx enter/exit not paired.(offset:%ld size:%ld s1:%p s2:%p vc:%d\n",
+ offset, size, s1, s2, vmx_count);
+ printf("\n");
+ abort();
+ }
+ }
+ }
+}
+
+static int testcase(bool islarge)
+{
+ char *s1;
+ char *s2;
+ unsigned long i;
+
+ unsigned long comp_size = (islarge ? LARGE_SIZE : SIZE);
+ unsigned long alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
+ int iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
+
+ s1 = memalign(128, alloc_size);
+ if (!s1) {
+ perror("memalign");
+ exit(1);
+ }
+
+ s2 = memalign(128, alloc_size);
+ if (!s2) {
+ perror("memalign");
+ exit(1);
+ }
+
+ srandom(time(0));
+
+ for (i = 0; i < iterations; i++) {
+ unsigned long j;
+ unsigned long change;
+ char *rand_s1 = s1;
+ char *rand_s2 = s2;
+
+ for (j = 0; j < alloc_size; j++)
+ s1[j] = random();
+
+ rand_s1 += random() % MAX_OFFSET_DIFF_S1_S2;
+ rand_s2 += random() % MAX_OFFSET_DIFF_S1_S2;
+ memcpy(rand_s2, rand_s1, comp_size);
+
+ /* change one byte */
+ change = random() % comp_size;
+ rand_s2[change] = random() & 0xff;
+
+ if (islarge)
+ test_one(rand_s1, rand_s2, LARGE_MAX_OFFSET,
+ LARGE_SIZE_START, comp_size);
+ else
+ test_one(rand_s1, rand_s2, SIZE, 0, comp_size);
+ }
+
+ srandom(time(0));
+
+ for (i = 0; i < iterations; i++) {
+ unsigned long j;
+ unsigned long change;
+ char *rand_s1 = s1;
+ char *rand_s2 = s2;
+
+ for (j = 0; j < alloc_size; j++)
+ s1[j] = random();
+
+ rand_s1 += random() % MAX_OFFSET_DIFF_S1_S2;
+ rand_s2 += random() % MAX_OFFSET_DIFF_S1_S2;
+ memcpy(rand_s2, rand_s1, comp_size);
+
+ /* change multiple bytes, 1/8 of total */
+ for (j = 0; j < comp_size / 8; j++) {
+ change = random() % comp_size;
+ s2[change] = random() & 0xff;
+ }
+
+ if (islarge)
+ test_one(rand_s1, rand_s2, LARGE_MAX_OFFSET,
+ LARGE_SIZE_START, comp_size);
+ else
+ test_one(rand_s1, rand_s2, SIZE, 0, comp_size);
+ }
+
+ return 0;
+}
+
+static int testcases(void)
+{
+ testcase(0);
+ testcase(1);
+ return 0;
+}
+
+int main(void)
+{
+ test_harness_set_timeout(300);
+ return test_harness(testcases, "memcmp");
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp_32.S b/tools/testing/selftests/powerpc/stringloops/memcmp_32.S
new file mode 120000
index 000000000..056f2b3af
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp_32.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/memcmp_32.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp_64.S b/tools/testing/selftests/powerpc/stringloops/memcmp_64.S
new file mode 120000
index 000000000..9bc87e438
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/memcmp_64.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/stringloops/string.c b/tools/testing/selftests/powerpc/stringloops/string.c
new file mode 100644
index 000000000..45e777541
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/string.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copied from linux/lib/string.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <stddef.h>
+
+/**
+ * strlen - Find the length of a string
+ * @s: The string to be sized
+ */
+size_t test_strlen(const char *s)
+{
+ const char *sc;
+
+ for (sc = s; *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/strlen.c b/tools/testing/selftests/powerpc/stringloops/strlen.c
new file mode 100644
index 000000000..9055ebc48
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/strlen.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "utils.h"
+
+#define SIZE 256
+#define ITERATIONS 1000
+#define ITERATIONS_BENCH 100000
+
+int test_strlen(const void *s);
+
+/* test all offsets and lengths */
+static void test_one(char *s)
+{
+ unsigned long offset;
+
+ for (offset = 0; offset < SIZE; offset++) {
+ int x, y;
+ unsigned long i;
+
+ y = strlen(s + offset);
+ x = test_strlen(s + offset);
+
+ if (x != y) {
+ printf("strlen() returned %d, should have returned %d (%p offset %ld)\n", x, y, s, offset);
+
+ for (i = offset; i < SIZE; i++)
+ printf("%02x ", s[i]);
+ printf("\n");
+ }
+ }
+}
+
+static void bench_test(char *s)
+{
+ struct timespec ts_start, ts_end;
+ int i;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+ for (i = 0; i < ITERATIONS_BENCH; i++)
+ test_strlen(s);
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_end);
+
+ printf("len %3.3d : time = %.6f\n", test_strlen(s), ts_end.tv_sec - ts_start.tv_sec + (ts_end.tv_nsec - ts_start.tv_nsec) / 1e9);
+}
+
+static int testcase(void)
+{
+ char *s;
+ unsigned long i;
+
+ s = memalign(128, SIZE);
+ if (!s) {
+ perror("memalign");
+ exit(1);
+ }
+
+ srandom(1);
+
+ memset(s, 0, SIZE);
+ for (i = 0; i < SIZE; i++) {
+ char c;
+
+ do {
+ c = random() & 0x7f;
+ } while (!c);
+ s[i] = c;
+ test_one(s);
+ }
+
+ for (i = 0; i < ITERATIONS; i++) {
+ unsigned long j;
+
+ for (j = 0; j < SIZE; j++) {
+ char c;
+
+ do {
+ c = random() & 0x7f;
+ } while (!c);
+ s[j] = c;
+ }
+ for (j = 0; j < sizeof(long); j++) {
+ s[SIZE - 1 - j] = 0;
+ test_one(s);
+ }
+ }
+
+ for (i = 0; i < SIZE; i++) {
+ char c;
+
+ do {
+ c = random() & 0x7f;
+ } while (!c);
+ s[i] = c;
+ }
+
+ bench_test(s);
+
+ s[16] = 0;
+ bench_test(s);
+
+ s[8] = 0;
+ bench_test(s);
+
+ s[4] = 0;
+ bench_test(s);
+
+ s[3] = 0;
+ bench_test(s);
+
+ s[2] = 0;
+ bench_test(s);
+
+ s[1] = 0;
+ bench_test(s);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(testcase, "strlen");
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/strlen_32.S b/tools/testing/selftests/powerpc/stringloops/strlen_32.S
new file mode 120000
index 000000000..72b13731b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/strlen_32.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/strlen_32.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/switch_endian/.gitignore b/tools/testing/selftests/powerpc/switch_endian/.gitignore
new file mode 100644
index 000000000..89e762eab
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/.gitignore
@@ -0,0 +1,2 @@
+switch_endian_test
+check-reversed.S
diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile
new file mode 100644
index 000000000..bdc081afe
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := switch_endian_test
+
+ASFLAGS += -O2 -Wall -g -nostdlib -m64
+
+EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(OUTPUT)/switch_endian_test: ASFLAGS += -I $(OUTPUT)
+$(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S
+
+$(OUTPUT)/check-reversed.o: $(OUTPUT)/check.o
+ $(CROSS_COMPILE)objcopy -j .text --reverse-bytes=4 -O binary $< $@
+
+$(OUTPUT)/check-reversed.S: $(OUTPUT)/check-reversed.o
+ hexdump -v -e '/1 ".byte 0x%02X\n"' $< > $@
diff --git a/tools/testing/selftests/powerpc/switch_endian/check.S b/tools/testing/selftests/powerpc/switch_endian/check.S
new file mode 100644
index 000000000..927a5c675
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/check.S
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "common.h"
+
+/*
+ * Checks that registers contain what we expect, ie. they were not clobbered by
+ * the syscall.
+ *
+ * r15: pattern to check registers against.
+ *
+ * At the end r3 == 0 if everything's OK.
+ */
+ nop # guaranteed to be illegal in reverse-endian
+ mr r9,r15
+ cmpd r9,r3 # check r3
+ bne 1f
+ addi r9,r15,4 # check r4
+ cmpd r9,r4
+ bne 1f
+ lis r9,0x00FF # check CR
+ ori r9,r9,0xF000
+ mfcr r10
+ and r10,r10,r9
+ cmpw r9,r10
+ addi r9,r15,34
+ bne 1f
+ addi r9,r15,32 # check LR
+ mflr r10
+ cmpd r9,r10
+ bne 1f
+ addi r9,r15,5 # check r5
+ cmpd r9,r5
+ bne 1f
+ addi r9,r15,6 # check r6
+ cmpd r9,r6
+ bne 1f
+ addi r9,r15,7 # check r7
+ cmpd r9,r7
+ bne 1f
+ addi r9,r15,8 # check r8
+ cmpd r9,r8
+ bne 1f
+ addi r9,r15,13 # check r13
+ cmpd r9,r13
+ bne 1f
+ addi r9,r15,14 # check r14
+ cmpd r9,r14
+ bne 1f
+ addi r9,r15,16 # check r16
+ cmpd r9,r16
+ bne 1f
+ addi r9,r15,17 # check r17
+ cmpd r9,r17
+ bne 1f
+ addi r9,r15,18 # check r18
+ cmpd r9,r18
+ bne 1f
+ addi r9,r15,19 # check r19
+ cmpd r9,r19
+ bne 1f
+ addi r9,r15,20 # check r20
+ cmpd r9,r20
+ bne 1f
+ addi r9,r15,21 # check r21
+ cmpd r9,r21
+ bne 1f
+ addi r9,r15,22 # check r22
+ cmpd r9,r22
+ bne 1f
+ addi r9,r15,23 # check r23
+ cmpd r9,r23
+ bne 1f
+ addi r9,r15,24 # check r24
+ cmpd r9,r24
+ bne 1f
+ addi r9,r15,25 # check r25
+ cmpd r9,r25
+ bne 1f
+ addi r9,r15,26 # check r26
+ cmpd r9,r26
+ bne 1f
+ addi r9,r15,27 # check r27
+ cmpd r9,r27
+ bne 1f
+ addi r9,r15,28 # check r28
+ cmpd r9,r28
+ bne 1f
+ addi r9,r15,29 # check r29
+ cmpd r9,r29
+ bne 1f
+ addi r9,r15,30 # check r30
+ cmpd r9,r30
+ bne 1f
+ addi r9,r15,31 # check r31
+ cmpd r9,r31
+ bne 1f
+ b 2f
+1: mr r3, r9
+ li r0, __NR_exit
+ sc
+2: li r0, __NR_switch_endian
+ nop
diff --git a/tools/testing/selftests/powerpc/switch_endian/common.h b/tools/testing/selftests/powerpc/switch_endian/common.h
new file mode 100644
index 000000000..1434cbc2a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/common.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <ppc-asm.h>
+#include <asm/unistd.h>
+
+#ifndef __NR_switch_endian
+#define __NR_switch_endian 363
+#endif
diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
new file mode 100644
index 000000000..cc4930467
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "common.h"
+
+ .data
+ .balign 8
+message:
+ .ascii "success: switch_endian_test\n\0"
+
+ .section ".toc"
+ .balign 8
+pattern:
+ .8byte 0x5555AAAA5555AAAA
+
+ .text
+FUNC_START(_start)
+ /* Load the pattern */
+ ld r15, pattern@TOC(%r2)
+
+ /* Setup CR, only CR2-CR4 are maintained */
+ lis r3, 0x00FF
+ ori r3, r3, 0xF000
+ mtcr r3
+
+ /* Load the pattern slightly modified into the registers */
+ mr r3, r15
+ addi r4, r15, 4
+
+ addi r5, r15, 32
+ mtlr r5
+
+ addi r5, r15, 5
+ addi r6, r15, 6
+ addi r7, r15, 7
+ addi r8, r15, 8
+
+ /* r9 - r12 are clobbered */
+
+ addi r13, r15, 13
+ addi r14, r15, 14
+
+ /* Skip r15 we're using it */
+
+ addi r16, r15, 16
+ addi r17, r15, 17
+ addi r18, r15, 18
+ addi r19, r15, 19
+ addi r20, r15, 20
+ addi r21, r15, 21
+ addi r22, r15, 22
+ addi r23, r15, 23
+ addi r24, r15, 24
+ addi r25, r15, 25
+ addi r26, r15, 26
+ addi r27, r15, 27
+ addi r28, r15, 28
+ addi r29, r15, 29
+ addi r30, r15, 30
+ addi r31, r15, 31
+
+ /*
+ * Call the syscall to switch endian.
+ * It clobbers r9-r12, XER, CTR and CR0-1,5-7.
+ */
+ li r0, __NR_switch_endian
+ sc
+
+#include "check-reversed.S"
+
+ /* Flip back, r0 already has the switch syscall number */
+ .long 0x02000044 /* sc */
+
+#include "check.S"
+
+ li r0, __NR_write
+ li r3, 1 /* stdout */
+ ld r4, message@got(%r2)
+ li r5, 28 /* strlen(message3) */
+ sc
+ li r0, __NR_exit
+ li r3, 0
+ sc
+ b .
diff --git a/tools/testing/selftests/powerpc/syscalls/.gitignore b/tools/testing/selftests/powerpc/syscalls/.gitignore
new file mode 100644
index 000000000..f0f3fcc9d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/.gitignore
@@ -0,0 +1 @@
+ipc_unmuxed
diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile
new file mode 100644
index 000000000..161b88463
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/Makefile
@@ -0,0 +1,8 @@
+TEST_GEN_PROGS := ipc_unmuxed
+
+CFLAGS += -I../../../../../usr/include
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/syscalls/ipc.h b/tools/testing/selftests/powerpc/syscalls/ipc.h
new file mode 100644
index 000000000..26a20682c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/ipc.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef __NR_semop
+DO_TEST(semop, __NR_semop)
+#endif
+
+#ifdef __NR_semget
+DO_TEST(semget, __NR_semget)
+#endif
+
+#ifdef __NR_semctl
+DO_TEST(semctl, __NR_semctl)
+#endif
+
+#ifdef __NR_semtimedop
+DO_TEST(semtimedop, __NR_semtimedop)
+#endif
+
+#ifdef __NR_msgsnd
+DO_TEST(msgsnd, __NR_msgsnd)
+#endif
+
+#ifdef __NR_msgrcv
+DO_TEST(msgrcv, __NR_msgrcv)
+#endif
+
+#ifdef __NR_msgget
+DO_TEST(msgget, __NR_msgget)
+#endif
+
+#ifdef __NR_msgctl
+DO_TEST(msgctl, __NR_msgctl)
+#endif
+
+#ifdef __NR_shmat
+DO_TEST(shmat, __NR_shmat)
+#endif
+
+#ifdef __NR_shmdt
+DO_TEST(shmdt, __NR_shmdt)
+#endif
+
+#ifdef __NR_shmget
+DO_TEST(shmget, __NR_shmget)
+#endif
+
+#ifdef __NR_shmctl
+DO_TEST(shmctl, __NR_shmctl)
+#endif
diff --git a/tools/testing/selftests/powerpc/syscalls/ipc_unmuxed.c b/tools/testing/selftests/powerpc/syscalls/ipc_unmuxed.c
new file mode 100644
index 000000000..2ac02706f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/ipc_unmuxed.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2015, Michael Ellerman, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test simply tests that certain syscalls are implemented. It doesn't
+ * actually exercise their logic in any way.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "utils.h"
+
+
+#define DO_TEST(_name, _num) \
+static int test_##_name(void) \
+{ \
+ int rc; \
+ printf("Testing " #_name); \
+ errno = 0; \
+ rc = syscall(_num, -1, 0, 0, 0, 0, 0); \
+ printf("\treturned %d, errno %d\n", rc, errno); \
+ return errno == ENOSYS; \
+}
+
+#include "ipc.h"
+#undef DO_TEST
+
+static int ipc_unmuxed(void)
+{
+ int tests_done = 0;
+
+#define DO_TEST(_name, _num) \
+ FAIL_IF(test_##_name()); \
+ tests_done++;
+
+#include "ipc.h"
+#undef DO_TEST
+
+ /*
+ * If we ran no tests then it means none of the syscall numbers were
+ * defined, possibly because we were built against old headers. But it
+ * means we didn't really test anything, so instead of passing mark it
+ * as a skip to give the user a clue.
+ */
+ SKIP_IF(tests_done == 0);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(ipc_unmuxed, "ipc_unmuxed");
+}
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
new file mode 100644
index 000000000..c3ee8393d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -0,0 +1,17 @@
+tm-resched-dscr
+tm-syscall
+tm-signal-msr-resv
+tm-signal-stack
+tm-vmxcopy
+tm-fork
+tm-tar
+tm-tmspr
+tm-exec
+tm-signal-context-chk-fpu
+tm-signal-context-chk-gpr
+tm-signal-context-chk-vmx
+tm-signal-context-chk-vsx
+tm-vmx-unavail
+tm-unavailable
+tm-trap
+tm-sigreturn
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
new file mode 100644
index 000000000..9fc2cf6fb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu \
+ tm-signal-context-chk-vmx tm-signal-context-chk-vsx
+
+TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
+ tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
+ $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+CFLAGS += -mhtm
+
+$(OUTPUT)/tm-syscall: tm-syscall-asm.S
+$(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include
+$(OUTPUT)/tm-tmspr: CFLAGS += -pthread
+$(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64
+$(OUTPUT)/tm-resched-dscr: ../pmu/lib.c
+$(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx
+$(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64
+
+SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS))
+$(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
+$(SIGNAL_CONTEXT_CHK_TESTS): CFLAGS += -mhtm -m64 -mvsx
diff --git a/tools/testing/selftests/powerpc/tm/tm-exec.c b/tools/testing/selftests/powerpc/tm/tm-exec.c
new file mode 100644
index 000000000..3d27fa0ec
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-exec.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Syscalls can be performed provided the transactions are suspended.
+ * The exec() class of syscall is unique as a new process is loaded.
+ *
+ * It makes little sense for after an exec() call for the previously
+ * suspended transaction to still exist.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <inttypes.h>
+#include <libgen.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "utils.h"
+#include "tm.h"
+
+static char *path;
+
+static int test_exec(void)
+{
+ SKIP_IF(!have_htm());
+
+ asm __volatile__(
+ "tbegin.;"
+ "blt 1f; "
+ "tsuspend.;"
+ "1: ;"
+ : : : "memory");
+
+ execl(path, "tm-exec", "--child", NULL);
+
+ /* Shouldn't get here */
+ perror("execl() failed");
+ return 1;
+}
+
+static int after_exec(void)
+{
+ asm __volatile__(
+ "tbegin.;"
+ "blt 1f;"
+ "tsuspend.;"
+ "1: ;"
+ : : : "memory");
+
+ FAIL_IF(failure_is_nesting());
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ path = argv[0];
+
+ if (argc > 1 && strcmp(argv[1], "--child") == 0)
+ return after_exec();
+
+ return test_harness(test_exec, "tm_exec");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-fork.c b/tools/testing/selftests/powerpc/tm/tm-fork.c
new file mode 100644
index 000000000..8d48579b7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-fork.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Edited: Rashmica Gupta, Nov 2015
+ *
+ * This test does a fork syscall inside a transaction. Basic sniff test
+ * to see if we can enter the kernel during a transaction.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "utils.h"
+#include "tm.h"
+
+int test_fork(void)
+{
+ SKIP_IF(!have_htm());
+
+ asm __volatile__(
+ "tbegin.;"
+ "blt 1f; "
+ "li 0, 2;" /* fork syscall */
+ "sc ;"
+ "tend.;"
+ "1: ;"
+ : : : "memory", "r0");
+ /* If we reach here, we've passed. Otherwise we've probably crashed
+ * the kernel */
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_fork, "tm_fork");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
new file mode 100644
index 000000000..4cdb83964
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test context switching to see if the DSCR SPR is correctly preserved
+ * when within a transaction.
+ *
+ * Note: We assume that the DSCR has been left at the default value (0)
+ * for all CPUs.
+ *
+ * Method:
+ *
+ * Set a value into the DSCR.
+ *
+ * Start a transaction, and suspend it (*).
+ *
+ * Hard loop checking to see if the transaction has become doomed.
+ *
+ * Now that we *may* have been preempted, record the DSCR and TEXASR SPRS.
+ *
+ * If the abort was because of a context switch, check the DSCR value.
+ * Otherwise, try again.
+ *
+ * (*) If the transaction is not suspended we can't see the problem because
+ * the transaction abort handler will restore the DSCR to it's checkpointed
+ * value before we regain control.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <asm/tm.h>
+
+#include "utils.h"
+#include "tm.h"
+#include "../pmu/lib.h"
+
+#define SPRN_DSCR 0x03
+
+int test_body(void)
+{
+ uint64_t rv, dscr1 = 1, dscr2, texasr;
+
+ SKIP_IF(!have_htm());
+
+ printf("Check DSCR TM context switch: ");
+ fflush(stdout);
+ for (;;) {
+ asm __volatile__ (
+ /* set a known value into the DSCR */
+ "ld 3, %[dscr1];"
+ "mtspr %[sprn_dscr], 3;"
+
+ "li %[rv], 1;"
+ /* start and suspend a transaction */
+ "tbegin.;"
+ "beq 1f;"
+ "tsuspend.;"
+
+ /* hard loop until the transaction becomes doomed */
+ "2: ;"
+ "tcheck 0;"
+ "bc 4, 0, 2b;"
+
+ /* record DSCR and TEXASR */
+ "mfspr 3, %[sprn_dscr];"
+ "std 3, %[dscr2];"
+ "mfspr 3, %[sprn_texasr];"
+ "std 3, %[texasr];"
+
+ "tresume.;"
+ "tend.;"
+ "li %[rv], 0;"
+ "1: ;"
+ : [rv]"=r"(rv), [dscr2]"=m"(dscr2), [texasr]"=m"(texasr)
+ : [dscr1]"m"(dscr1)
+ , [sprn_dscr]"i"(SPRN_DSCR), [sprn_texasr]"i"(SPRN_TEXASR)
+ : "memory", "r3"
+ );
+ assert(rv); /* make sure the transaction aborted */
+ if ((texasr >> 56) != TM_CAUSE_RESCHED) {
+ continue;
+ }
+ if (dscr2 != dscr1) {
+ printf(" FAIL\n");
+ return 1;
+ } else {
+ printf(" OK\n");
+ return 0;
+ }
+ }
+}
+
+static int tm_resched_dscr(void)
+{
+ return eat_cpu(test_body);
+}
+
+int main(int argc, const char *argv[])
+{
+ return test_harness(tm_resched_dscr, "tm_resched_dscr");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
new file mode 100644
index 000000000..c760debbd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * Test the kernel's signal frame code.
+ *
+ * The kernel sets up two sets of ucontexts if the signal was to be
+ * delivered while the thread was in a transaction.
+ * Expected behaviour is that the checkpointed state is in the user
+ * context passed to the signal handler. The speculated state can be
+ * accessed with the uc_link pointer.
+ *
+ * The rationale for this is that if TM unaware code (which linked
+ * against TM libs) installs a signal handler it will not know of the
+ * speculative nature of the 'live' registers and may infer the wrong
+ * thing.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+#include "tm.h"
+
+#define MAX_ATTEMPT 500000
+
+#define NV_FPU_REGS 18
+
+long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss);
+
+/* Be sure there are 2x as many as there are NV FPU regs (2x18) */
+static double fps[] = {
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ -1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18
+};
+
+static sig_atomic_t fail;
+
+static void signal_usr1(int signum, siginfo_t *info, void *uc)
+{
+ int i;
+ ucontext_t *ucp = uc;
+ ucontext_t *tm_ucp = ucp->uc_link;
+
+ for (i = 0; i < NV_FPU_REGS && !fail; i++) {
+ fail = (ucp->uc_mcontext.fp_regs[i + 14] != fps[i]);
+ fail |= (tm_ucp->uc_mcontext.fp_regs[i + 14] != fps[i + NV_FPU_REGS]);
+ if (fail)
+ printf("Failed on %d FP %g or %g\n", i, ucp->uc_mcontext.fp_regs[i + 14], tm_ucp->uc_mcontext.fp_regs[i + 14]);
+ }
+}
+
+static int tm_signal_context_chk_fpu()
+{
+ struct sigaction act;
+ int i;
+ long rc;
+ pid_t pid = getpid();
+
+ SKIP_IF(!have_htm());
+
+ act.sa_sigaction = signal_usr1;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGUSR1, &act, NULL) < 0) {
+ perror("sigaction sigusr1");
+ exit(1);
+ }
+
+ i = 0;
+ while (i < MAX_ATTEMPT && !fail) {
+ rc = tm_signal_self_context_load(pid, NULL, fps, NULL, NULL);
+ FAIL_IF(rc != pid);
+ i++;
+ }
+
+ return fail;
+}
+
+int main(void)
+{
+ return test_harness(tm_signal_context_chk_fpu, "tm_signal_context_chk_fpu");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
new file mode 100644
index 000000000..df91330a0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * Test the kernel's signal frame code.
+ *
+ * The kernel sets up two sets of ucontexts if the signal was to be
+ * delivered while the thread was in a transaction.
+ * Expected behaviour is that the checkpointed state is in the user
+ * context passed to the signal handler. The speculated state can be
+ * accessed with the uc_link pointer.
+ *
+ * The rationale for this is that if TM unaware code (which linked
+ * against TM libs) installs a signal handler it will not know of the
+ * speculative nature of the 'live' registers and may infer the wrong
+ * thing.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+#include "tm.h"
+
+#define MAX_ATTEMPT 500000
+
+#define NV_GPR_REGS 18
+
+long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss);
+
+static sig_atomic_t fail;
+
+static long gps[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ -1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18};
+
+static void signal_usr1(int signum, siginfo_t *info, void *uc)
+{
+ int i;
+ ucontext_t *ucp = uc;
+ ucontext_t *tm_ucp = ucp->uc_link;
+
+ for (i = 0; i < NV_GPR_REGS && !fail; i++) {
+ fail = (ucp->uc_mcontext.gp_regs[i + 14] != gps[i]);
+ fail |= (tm_ucp->uc_mcontext.gp_regs[i + 14] != gps[i + NV_GPR_REGS]);
+ if (fail)
+ printf("Failed on %d GPR %lu or %lu\n", i,
+ ucp->uc_mcontext.gp_regs[i + 14], tm_ucp->uc_mcontext.gp_regs[i + 14]);
+ }
+}
+
+static int tm_signal_context_chk_gpr()
+{
+ struct sigaction act;
+ int i;
+ long rc;
+ pid_t pid = getpid();
+
+ SKIP_IF(!have_htm());
+
+ act.sa_sigaction = signal_usr1;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGUSR1, &act, NULL) < 0) {
+ perror("sigaction sigusr1");
+ exit(1);
+ }
+
+ i = 0;
+ while (i < MAX_ATTEMPT && !fail) {
+ rc = tm_signal_self_context_load(pid, gps, NULL, NULL, NULL);
+ FAIL_IF(rc != pid);
+ i++;
+ }
+
+ return fail;
+}
+
+int main(void)
+{
+ return test_harness(tm_signal_context_chk_gpr, "tm_signal_context_chk_gpr");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
new file mode 100644
index 000000000..f0ee55fd5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * Test the kernel's signal frame code.
+ *
+ * The kernel sets up two sets of ucontexts if the signal was to be
+ * delivered while the thread was in a transaction.
+ * Expected behaviour is that the checkpointed state is in the user
+ * context passed to the signal handler. The speculated state can be
+ * accessed with the uc_link pointer.
+ *
+ * The rationale for this is that if TM unaware code (which linked
+ * against TM libs) installs a signal handler it will not know of the
+ * speculative nature of the 'live' registers and may infer the wrong
+ * thing.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+#include "tm.h"
+
+#define MAX_ATTEMPT 500000
+
+#define NV_VMX_REGS 12
+
+long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss);
+
+static sig_atomic_t fail;
+
+vector int vms[] = {
+ {1, 2, 3, 4 },{5, 6, 7, 8 },{9, 10,11,12},
+ {13,14,15,16},{17,18,19,20},{21,22,23,24},
+ {25,26,27,28},{29,30,31,32},{33,34,35,36},
+ {37,38,39,40},{41,42,43,44},{45,46,47,48},
+ {-1, -2, -3, -4}, {-5, -6, -7, -8}, {-9, -10,-11,-12},
+ {-13,-14,-15,-16},{-17,-18,-19,-20},{-21,-22,-23,-24},
+ {-25,-26,-27,-28},{-29,-30,-31,-32},{-33,-34,-35,-36},
+ {-37,-38,-39,-40},{-41,-42,-43,-44},{-45,-46,-47,-48}
+};
+
+static void signal_usr1(int signum, siginfo_t *info, void *uc)
+{
+ int i;
+ ucontext_t *ucp = uc;
+ ucontext_t *tm_ucp = ucp->uc_link;
+
+ for (i = 0; i < NV_VMX_REGS && !fail; i++) {
+ fail = memcmp(ucp->uc_mcontext.v_regs->vrregs[i + 20],
+ &vms[i], sizeof(vector int));
+ fail |= memcmp(tm_ucp->uc_mcontext.v_regs->vrregs[i + 20],
+ &vms[i + NV_VMX_REGS], sizeof (vector int));
+
+ if (fail) {
+ int j;
+
+ fprintf(stderr, "Failed on %d vmx 0x", i);
+ for (j = 0; j < 4; j++)
+ fprintf(stderr, "%04x", ucp->uc_mcontext.v_regs->vrregs[i + 20][j]);
+ fprintf(stderr, " vs 0x");
+ for (j = 0 ; j < 4; j++)
+ fprintf(stderr, "%04x", tm_ucp->uc_mcontext.v_regs->vrregs[i + 20][j]);
+ fprintf(stderr, "\n");
+ }
+ }
+}
+
+static int tm_signal_context_chk()
+{
+ struct sigaction act;
+ int i;
+ long rc;
+ pid_t pid = getpid();
+
+ SKIP_IF(!have_htm());
+
+ act.sa_sigaction = signal_usr1;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGUSR1, &act, NULL) < 0) {
+ perror("sigaction sigusr1");
+ exit(1);
+ }
+
+ i = 0;
+ while (i < MAX_ATTEMPT && !fail) {
+ rc = tm_signal_self_context_load(pid, NULL, NULL, vms, NULL);
+ FAIL_IF(rc != pid);
+ i++;
+ }
+
+ return fail;
+}
+
+int main(void)
+{
+ return test_harness(tm_signal_context_chk, "tm_signal_context_chk_vmx");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
new file mode 100644
index 000000000..b99c3d835
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * Test the kernel's signal frame code.
+ *
+ * The kernel sets up two sets of ucontexts if the signal was to be
+ * delivered while the thread was in a transaction.
+ * Expected behaviour is that the checkpointed state is in the user
+ * context passed to the signal handler. The speculated state can be
+ * accessed with the uc_link pointer.
+ *
+ * The rationale for this is that if TM unaware code (which linked
+ * against TM libs) installs a signal handler it will not know of the
+ * speculative nature of the 'live' registers and may infer the wrong
+ * thing.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+#include "tm.h"
+
+#define MAX_ATTEMPT 500000
+
+#define NV_VSX_REGS 12
+
+long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss);
+
+static sig_atomic_t fail;
+
+vector int vss[] = {
+ {1, 2, 3, 4 },{5, 6, 7, 8 },{9, 10,11,12},
+ {13,14,15,16},{17,18,19,20},{21,22,23,24},
+ {25,26,27,28},{29,30,31,32},{33,34,35,36},
+ {37,38,39,40},{41,42,43,44},{45,46,47,48},
+ {-1, -2, -3, -4 },{-5, -6, -7, -8 },{-9, -10,-11,-12},
+ {-13,-14,-15,-16},{-17,-18,-19,-20},{-21,-22,-23,-24},
+ {-25,-26,-27,-28},{-29,-30,-31,-32},{-33,-34,-35,-36},
+ {-37,-38,-39,-40},{-41,-42,-43,-44},{-45,-46,-47,-48}
+};
+
+static void signal_usr1(int signum, siginfo_t *info, void *uc)
+{
+ int i;
+ uint8_t vsc[sizeof(vector int)];
+ uint8_t vst[sizeof(vector int)];
+ ucontext_t *ucp = uc;
+ ucontext_t *tm_ucp = ucp->uc_link;
+
+ /*
+ * The other half of the VSX regs will be after v_regs.
+ *
+ * In short, vmx_reserve array holds everything. v_regs is a 16
+ * byte aligned pointer at the start of vmx_reserve (vmx_reserve
+ * may or may not be 16 aligned) where the v_regs structure exists.
+ * (half of) The VSX regsters are directly after v_regs so the
+ * easiest way to find them below.
+ */
+ long *vsx_ptr = (long *)(ucp->uc_mcontext.v_regs + 1);
+ long *tm_vsx_ptr = (long *)(tm_ucp->uc_mcontext.v_regs + 1);
+ for (i = 0; i < NV_VSX_REGS && !fail; i++) {
+ memcpy(vsc, &ucp->uc_mcontext.fp_regs[i + 20], 8);
+ memcpy(vsc + 8, &vsx_ptr[20 + i], 8);
+ fail = memcmp(vsc, &vss[i], sizeof(vector int));
+ memcpy(vst, &tm_ucp->uc_mcontext.fp_regs[i + 20], 8);
+ memcpy(vst + 8, &tm_vsx_ptr[20 + i], 8);
+ fail |= memcmp(vst, &vss[i + NV_VSX_REGS], sizeof(vector int));
+
+ if (fail) {
+ int j;
+
+ fprintf(stderr, "Failed on %d vsx 0x", i);
+ for (j = 0; j < 16; j++)
+ fprintf(stderr, "%02x", vsc[j]);
+ fprintf(stderr, " vs 0x");
+ for (j = 0; j < 16; j++)
+ fprintf(stderr, "%02x", vst[j]);
+ fprintf(stderr, "\n");
+ }
+ }
+}
+
+static int tm_signal_context_chk()
+{
+ struct sigaction act;
+ int i;
+ long rc;
+ pid_t pid = getpid();
+
+ SKIP_IF(!have_htm());
+
+ act.sa_sigaction = signal_usr1;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGUSR1, &act, NULL) < 0) {
+ perror("sigaction sigusr1");
+ exit(1);
+ }
+
+ i = 0;
+ while (i < MAX_ATTEMPT && !fail) {
+ rc = tm_signal_self_context_load(pid, NULL, NULL, NULL, vss);
+ FAIL_IF(rc != pid);
+ i++;
+ }
+
+ return fail;
+}
+
+int main(void)
+{
+ return test_harness(tm_signal_context_chk, "tm_signal_context_chk_vsx");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c
new file mode 100644
index 000000000..8c54d18b3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Test the kernel's signal return code to ensure that it doesn't
+ * crash when both the transactional and suspend MSR bits are set in
+ * the signal context.
+ *
+ * For this test, we send ourselves a SIGUSR1. In the SIGUSR1 handler
+ * we modify the signal context to set both MSR TM S and T bits (which
+ * is "reserved" by the PowerISA). When we return from the signal
+ * handler (implicit sigreturn), the kernel should detect reserved MSR
+ * value and send us with a SIGSEGV.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include "utils.h"
+#include "tm.h"
+
+int segv_expected = 0;
+
+void signal_segv(int signum)
+{
+ if (segv_expected && (signum == SIGSEGV))
+ _exit(0);
+ _exit(1);
+}
+
+void signal_usr1(int signum, siginfo_t *info, void *uc)
+{
+ ucontext_t *ucp = uc;
+
+ /* Link tm checkpointed context to normal context */
+ ucp->uc_link = ucp;
+ /* Set all TM bits so that the context is now invalid */
+#ifdef __powerpc64__
+ ucp->uc_mcontext.gp_regs[PT_MSR] |= (7ULL << 32);
+#else
+ ucp->uc_mcontext.uc_regs->gregs[PT_MSR] |= (7ULL);
+#endif
+ /* Should segv on return becuase of invalid context */
+ segv_expected = 1;
+}
+
+int tm_signal_msr_resv()
+{
+ struct sigaction act;
+
+ SKIP_IF(!have_htm());
+
+ act.sa_sigaction = signal_usr1;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGUSR1, &act, NULL) < 0) {
+ perror("sigaction sigusr1");
+ exit(1);
+ }
+ if (signal(SIGSEGV, signal_segv) == SIG_ERR)
+ exit(1);
+
+ raise(SIGUSR1);
+
+ /* We shouldn't get here as we exit in the segv handler */
+ return 1;
+}
+
+int main(void)
+{
+ return test_harness(tm_signal_msr_resv, "tm_signal_msr_resv");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
new file mode 100644
index 000000000..1f0eb5674
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Test the kernel's signal delievery code to ensure that we don't
+ * trelaim twice in the kernel signal delivery code. This can happen
+ * if we trigger a signal when in a transaction and the stack pointer
+ * is bogus.
+ *
+ * This test case registers a SEGV handler, sets the stack pointer
+ * (r1) to NULL, starts a transaction and then generates a SEGV. The
+ * SEGV should be handled but we exit here as the stack pointer is
+ * invalid and hance we can't sigreturn. We only need to check that
+ * this flow doesn't crash the kernel.
+ */
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <signal.h>
+
+#include "utils.h"
+#include "tm.h"
+
+void signal_segv(int signum)
+{
+ /* This should never actually run since stack is foobar */
+ exit(1);
+}
+
+int tm_signal_stack()
+{
+ int pid;
+
+ SKIP_IF(!have_htm());
+
+ pid = fork();
+ if (pid < 0)
+ exit(1);
+
+ if (pid) { /* Parent */
+ /*
+ * It's likely the whole machine will crash here so if
+ * the child ever exits, we are good.
+ */
+ wait(NULL);
+ return 0;
+ }
+
+ /*
+ * The flow here is:
+ * 1) register a signal handler (so signal delievery occurs)
+ * 2) make stack pointer (r1) = NULL
+ * 3) start transaction
+ * 4) cause segv
+ */
+ if (signal(SIGSEGV, signal_segv) == SIG_ERR)
+ exit(1);
+ asm volatile("li 1, 0 ;" /* stack ptr == NULL */
+ "1:"
+ "tbegin.;"
+ "beq 1b ;" /* retry forever */
+ "tsuspend.;"
+ "ld 2, 0(1) ;" /* trigger segv" */
+ : : : "memory");
+
+ /* This should never get here due to above segv */
+ return 1;
+}
+
+int main(void)
+{
+ return test_harness(tm_signal_stack, "tm_signal_stack");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal.S b/tools/testing/selftests/powerpc/tm/tm-signal.S
new file mode 100644
index 000000000..506a4ebaf
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal.S
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+#include "gpr_asm.h"
+#include "fpu_asm.h"
+#include "vmx_asm.h"
+#include "vsx_asm.h"
+
+/*
+ * Large caveat here being that the caller cannot expect the
+ * signal to always be sent! The hardware can (AND WILL!) abort
+ * the transaction between the tbegin and the tsuspend (however
+ * unlikely it seems or infrequently it actually happens).
+ * You have been warned.
+ */
+/* long tm_signal_self(pid_t pid, long *gprs, double *fps, vector *vms, vector *vss); */
+FUNC_START(tm_signal_self_context_load)
+ PUSH_BASIC_STACK(512)
+ /*
+ * Don't strictly need to save and restore as it depends on if
+ * we're going to use them, however this reduces messy logic
+ */
+ PUSH_VMX(STACK_FRAME_LOCAL(5,0),r8)
+ PUSH_FPU(512)
+ PUSH_NVREGS_BELOW_FPU(512)
+ std r3, STACK_FRAME_PARAM(0)(sp) /* pid */
+ std r4, STACK_FRAME_PARAM(1)(sp) /* gps */
+ std r5, STACK_FRAME_PARAM(2)(sp) /* fps */
+ std r6, STACK_FRAME_PARAM(3)(sp) /* vms */
+ std r7, STACK_FRAME_PARAM(4)(sp) /* vss */
+
+ ld r3, STACK_FRAME_PARAM(1)(sp)
+ cmpdi r3, 0
+ beq skip_gpr_lc
+ bl load_gpr
+skip_gpr_lc:
+ ld r3, STACK_FRAME_PARAM(2)(sp)
+ cmpdi r3, 0
+ beq skip_fpu_lc
+ bl load_fpu
+skip_fpu_lc:
+ ld r3, STACK_FRAME_PARAM(3)(sp)
+ cmpdi r3, 0
+ beq skip_vmx_lc
+ bl load_vmx
+skip_vmx_lc:
+ ld r3, STACK_FRAME_PARAM(4)(sp)
+ cmpdi r3, 0
+ beq skip_vsx_lc
+ bl load_vsx
+skip_vsx_lc:
+ /*
+ * Set r3 (return value) before tbegin. Use the pid as a known
+ * 'all good' return value, zero is used to indicate a non-doomed
+ * transaction.
+ */
+ ld r3, STACK_FRAME_PARAM(0)(sp)
+ tbegin.
+ beq 1f
+ tsuspend. /* Can't enter a syscall transactionally */
+ ld r3, STACK_FRAME_PARAM(1)(sp)
+ cmpdi r3, 0
+ beq skip_gpr_lt
+ /* Get the second half of the array */
+ addi r3, r3, 8 * 18
+ bl load_gpr
+skip_gpr_lt:
+ ld r3, STACK_FRAME_PARAM(2)(sp)
+ cmpdi r3, 0
+ beq skip_fpu_lt
+ /* Get the second half of the array */
+ addi r3, r3, 8 * 18
+ bl load_fpu
+skip_fpu_lt:
+ ld r3, STACK_FRAME_PARAM(3)(sp)
+ cmpdi r3, 0
+ beq skip_vmx_lt
+ /* Get the second half of the array */
+ addi r3, r3, 16 * 12
+ bl load_vmx
+skip_vmx_lt:
+ ld r3, STACK_FRAME_PARAM(4)(sp)
+ cmpdi r3, 0
+ beq skip_vsx_lt
+ /* Get the second half of the array */
+ addi r3, r3, 16 * 12
+ bl load_vsx
+skip_vsx_lt:
+ li r0, 37 /* sys_kill */
+ ld r3, STACK_FRAME_PARAM(0)(sp) /* pid */
+ li r4, 10 /* SIGUSR1 */
+ sc /* Taking the signal will doom the transaction */
+ tabort. 0
+ tresume. /* Be super sure we abort */
+ /*
+ * This will cause us to resume doomed transaction and cause
+ * hardware to cleanup, we'll end up at 1: anything between
+ * tresume. and 1: shouldn't ever run.
+ */
+ li r3, 0
+ 1:
+ POP_VMX(STACK_FRAME_LOCAL(5,0),r4)
+ POP_FPU(512)
+ POP_NVREGS_BELOW_FPU(512)
+ POP_BASIC_STACK(512)
+ blr
+FUNC_END(tm_signal_self_context_load)
diff --git a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
new file mode 100644
index 000000000..9a6017a1d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2015, Laurent Dufour, IBM Corp.
+ *
+ * Test the kernel's signal returning code to check reclaim is done if the
+ * sigreturn() is called while in a transaction (suspended since active is
+ * already dropped trough the system call path).
+ *
+ * The kernel must discard the transaction when entering sigreturn, since
+ * restoring the potential TM SPRS from the signal frame is requiring to not be
+ * in a transaction.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "tm.h"
+#include "utils.h"
+
+
+void handler(int sig)
+{
+ uint64_t ret;
+
+ asm __volatile__(
+ "li 3,1 ;"
+ "tbegin. ;"
+ "beq 1f ;"
+ "li 3,0 ;"
+ "tsuspend. ;"
+ "1: ;"
+ "std%X[ret] 3, %[ret] ;"
+ : [ret] "=m"(ret)
+ :
+ : "memory", "3", "cr0");
+
+ if (ret)
+ exit(1);
+
+ /*
+ * We return from the signal handle while in a suspended transaction
+ */
+}
+
+
+int tm_sigreturn(void)
+{
+ struct sigaction sa;
+ uint64_t ret = 0;
+
+ SKIP_IF(!have_htm());
+ SKIP_IF(!is_ppc64le());
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = handler;
+ sigemptyset(&sa.sa_mask);
+
+ if (sigaction(SIGSEGV, &sa, NULL))
+ exit(1);
+
+ asm __volatile__(
+ "tbegin. ;"
+ "beq 1f ;"
+ "li 3,0 ;"
+ "std 3,0(3) ;" /* trigger SEGV */
+ "li 3,1 ;"
+ "std%X[ret] 3,%[ret] ;"
+ "tend. ;"
+ "b 2f ;"
+ "1: ;"
+ "li 3,2 ;"
+ "std%X[ret] 3,%[ret] ;"
+ "2: ;"
+ : [ret] "=m"(ret)
+ :
+ : "memory", "3", "cr0");
+
+ if (ret != 2)
+ exit(1);
+
+ exit(0);
+}
+
+int main(void)
+{
+ return test_harness(tm_sigreturn, "tm_sigreturn");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
new file mode 100644
index 000000000..bd1ca25fe
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <ppc-asm.h>
+#include <asm/unistd.h>
+
+ .text
+FUNC_START(getppid_tm_active)
+ tbegin.
+ beq 1f
+ li r0, __NR_getppid
+ sc
+ tend.
+ blr
+1:
+ li r3, -1
+ blr
+
+FUNC_START(getppid_tm_suspended)
+ tbegin.
+ beq 1f
+ li r0, __NR_getppid
+ tsuspend.
+ sc
+ tresume.
+ tend.
+ blr
+1:
+ li r3, -1
+ blr
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c
new file mode 100644
index 000000000..454b965a2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2015, Sam Bobroff, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Test the kernel's system call code to ensure that a system call
+ * made from within an active HTM transaction is aborted with the
+ * correct failure code.
+ * Conversely, ensure that a system call made from within a
+ * suspended transaction can succeed.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <asm/tm.h>
+#include <sys/time.h>
+#include <stdlib.h>
+
+#include "utils.h"
+#include "tm.h"
+
+extern int getppid_tm_active(void);
+extern int getppid_tm_suspended(void);
+
+unsigned retries = 0;
+
+#define TEST_DURATION 10 /* seconds */
+#define TM_RETRIES 100
+
+pid_t getppid_tm(bool suspend)
+{
+ int i;
+ pid_t pid;
+
+ for (i = 0; i < TM_RETRIES; i++) {
+ if (suspend)
+ pid = getppid_tm_suspended();
+ else
+ pid = getppid_tm_active();
+
+ if (pid >= 0)
+ return pid;
+
+ if (failure_is_persistent()) {
+ if (failure_is_syscall())
+ return -1;
+
+ printf("Unexpected persistent transaction failure.\n");
+ printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n",
+ __builtin_get_texasr(), __builtin_get_tfiar());
+ exit(-1);
+ }
+
+ retries++;
+ }
+
+ printf("Exceeded limit of %d temporary transaction failures.\n", TM_RETRIES);
+ printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n",
+ __builtin_get_texasr(), __builtin_get_tfiar());
+
+ exit(-1);
+}
+
+int tm_syscall(void)
+{
+ unsigned count = 0;
+ struct timeval end, now;
+
+ SKIP_IF(!have_htm_nosc());
+
+ setbuf(stdout, NULL);
+
+ printf("Testing transactional syscalls for %d seconds...\n", TEST_DURATION);
+
+ gettimeofday(&end, NULL);
+ now.tv_sec = TEST_DURATION;
+ now.tv_usec = 0;
+ timeradd(&end, &now, &end);
+
+ for (count = 0; timercmp(&now, &end, <); count++) {
+ /*
+ * Test a syscall within a suspended transaction and verify
+ * that it succeeds.
+ */
+ FAIL_IF(getppid_tm(true) == -1); /* Should succeed. */
+
+ /*
+ * Test a syscall within an active transaction and verify that
+ * it fails with the correct failure code.
+ */
+ FAIL_IF(getppid_tm(false) != -1); /* Should fail... */
+ FAIL_IF(!failure_is_persistent()); /* ...persistently... */
+ FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */
+ gettimeofday(&now, 0);
+ }
+
+ printf("%d active and suspended transactions behaved correctly.\n", count);
+ printf("(There were %d transaction retries.)\n", retries);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(tm_syscall, "tm_syscall");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-tar.c b/tools/testing/selftests/powerpc/tm/tm-tar.c
new file mode 100644
index 000000000..f31fe5a28
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-tar.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ * Original: Michael Neuling 19/7/2013
+ * Edited: Rashmica Gupta 01/12/2015
+ *
+ * Do some transactions, see if the tar is corrupted.
+ * If the transaction is aborted, the TAR should be rolled back to the
+ * checkpointed value before the transaction began. The value written to
+ * TAR in suspended mode should only remain in TAR if the transaction
+ * completes.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "tm.h"
+#include "utils.h"
+
+int num_loops = 10000;
+
+int test_tar(void)
+{
+ int i;
+
+ SKIP_IF(!have_htm());
+ SKIP_IF(!is_ppc64le());
+
+ for (i = 0; i < num_loops; i++)
+ {
+ uint64_t result = 0;
+ asm __volatile__(
+ "li 7, 1;"
+ "mtspr %[tar], 7;" /* tar = 1 */
+ "tbegin.;"
+ "beq 3f;"
+ "li 4, 0x7000;" /* Loop lots, to use time */
+ "2:;" /* Start loop */
+ "li 7, 2;"
+ "mtspr %[tar], 7;" /* tar = 2 */
+ "tsuspend.;"
+ "li 7, 3;"
+ "mtspr %[tar], 7;" /* tar = 3 */
+ "tresume.;"
+ "subi 4, 4, 1;"
+ "cmpdi 4, 0;"
+ "bne 2b;"
+ "tend.;"
+
+ /* Transaction sucess! TAR should be 3 */
+ "mfspr 7, %[tar];"
+ "ori %[res], 7, 4;" // res = 3|4 = 7
+ "b 4f;"
+
+ /* Abort handler. TAR should be rolled back to 1 */
+ "3:;"
+ "mfspr 7, %[tar];"
+ "ori %[res], 7, 8;" // res = 1|8 = 9
+ "4:;"
+
+ : [res]"=r"(result)
+ : [tar]"i"(SPRN_TAR)
+ : "memory", "r0", "r4", "r7");
+
+ /* If result is anything else other than 7 or 9, the tar
+ * value must have been corrupted. */
+ if ((result != 7) && (result != 9))
+ return 1;
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ /* A low number of iterations (eg 100) can cause a false pass */
+ if (argc > 1) {
+ if (strcmp(argv[1], "-h") == 0) {
+ printf("Syntax:\n\t%s [<num loops>]\n",
+ argv[0]);
+ return 1;
+ } else {
+ num_loops = atoi(argv[1]);
+ }
+ }
+
+ printf("Starting, %d loops\n", num_loops);
+
+ return test_harness(test_tar, "tm_tar");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
new file mode 100644
index 000000000..df1d7d4b1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Original: Michael Neuling 3/4/2014
+ * Modified: Rashmica Gupta 8/12/2015
+ *
+ * Check if any of the Transaction Memory SPRs get corrupted.
+ * - TFIAR - stores address of location of transaction failure
+ * - TFHAR - stores address of software failure handler (if transaction
+ * fails)
+ * - TEXASR - lots of info about the transacion(s)
+ *
+ * (1) create more threads than cpus
+ * (2) in each thread:
+ * (a) set TFIAR and TFHAR a unique value
+ * (b) loop for awhile, continually checking to see if
+ * either register has been corrupted.
+ *
+ * (3) Loop:
+ * (a) begin transaction
+ * (b) abort transaction
+ * (c) check TEXASR to see if FS has been corrupted
+ *
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tm.h"
+
+int num_loops = 10000;
+int passed = 1;
+
+void tfiar_tfhar(void *in)
+{
+ int i, cpu;
+ unsigned long tfhar, tfhar_rd, tfiar, tfiar_rd;
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ cpu = (unsigned long)in >> 1;
+ CPU_SET(cpu, &cpuset);
+ sched_setaffinity(0, sizeof(cpuset), &cpuset);
+
+ /* TFIAR: Last bit has to be high so userspace can read register */
+ tfiar = ((unsigned long)in) + 1;
+ tfiar += 2;
+ mtspr(SPRN_TFIAR, tfiar);
+
+ /* TFHAR: Last two bits are reserved */
+ tfhar = ((unsigned long)in);
+ tfhar &= ~0x3UL;
+ tfhar += 4;
+ mtspr(SPRN_TFHAR, tfhar);
+
+ for (i = 0; i < num_loops; i++) {
+ tfhar_rd = mfspr(SPRN_TFHAR);
+ tfiar_rd = mfspr(SPRN_TFIAR);
+ if ( (tfhar != tfhar_rd) || (tfiar != tfiar_rd) ) {
+ passed = 0;
+ return;
+ }
+ }
+ return;
+}
+
+void texasr(void *in)
+{
+ unsigned long i;
+ uint64_t result = 0;
+
+ for (i = 0; i < num_loops; i++) {
+ asm __volatile__(
+ "tbegin.;"
+ "beq 3f ;"
+ "tabort. 0 ;"
+ "tend.;"
+
+ /* Abort handler */
+ "3: ;"
+ ::: "memory");
+
+ /* Check the TEXASR */
+ result = mfspr(SPRN_TEXASR);
+ if ((result & TEXASR_FS) == 0) {
+ passed = 0;
+ return;
+ }
+ }
+ return;
+}
+
+int test_tmspr()
+{
+ pthread_t *thread;
+ int thread_num;
+ unsigned long i;
+
+ SKIP_IF(!have_htm());
+
+ /* To cause some context switching */
+ thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN);
+
+ thread = malloc(thread_num * sizeof(pthread_t));
+ if (thread == NULL)
+ return EXIT_FAILURE;
+
+ /* Test TFIAR and TFHAR */
+ for (i = 0; i < thread_num; i += 2) {
+ if (pthread_create(&thread[i], NULL, (void *)tfiar_tfhar,
+ (void *)i))
+ return EXIT_FAILURE;
+ }
+ /* Test TEXASR */
+ for (i = 1; i < thread_num; i += 2) {
+ if (pthread_create(&thread[i], NULL, (void *)texasr, (void *)i))
+ return EXIT_FAILURE;
+ }
+
+ for (i = 0; i < thread_num; i++) {
+ if (pthread_join(thread[i], NULL) != 0)
+ return EXIT_FAILURE;
+ }
+
+ free(thread);
+
+ if (passed)
+ return 0;
+ else
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ if (argc > 1) {
+ if (strcmp(argv[1], "-h") == 0) {
+ printf("Syntax:\t [<num loops>]\n");
+ return 0;
+ } else {
+ num_loops = atoi(argv[1]);
+ }
+ }
+ return test_harness(test_tmspr, "tm_tmspr");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
new file mode 100644
index 000000000..179d592f0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-trap.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright 2017, Gustavo Romero, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Check if thread endianness is flipped inadvertently to BE on trap
+ * caught in TM whilst MSR.FP and MSR.VEC are zero (i.e. just after
+ * load_fp and load_vec overflowed).
+ *
+ * The issue can be checked on LE machines simply by zeroing load_fp
+ * and load_vec and then causing a trap in TM. Since the endianness
+ * changes to BE on return from the signal handler, 'nop' is
+ * thread as an illegal instruction in following sequence:
+ * tbegin.
+ * beq 1f
+ * trap
+ * tend.
+ * 1: nop
+ *
+ * However, although the issue is also present on BE machines, it's a
+ * bit trickier to check it on BE machines because MSR.LE bit is set
+ * to zero which determines a BE endianness that is the native
+ * endianness on BE machines, so nothing notably critical happens,
+ * i.e. no illegal instruction is observed immediately after returning
+ * from the signal handler (as it happens on LE machines). Thus to test
+ * it on BE machines LE endianness is forced after a first trap and then
+ * the endianness is verified on subsequent traps to determine if the
+ * endianness "flipped back" to the native endianness (BE).
+ */
+
+#define _GNU_SOURCE
+#include <error.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <htmintrin.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+
+#include "tm.h"
+#include "utils.h"
+
+#define pr_error(error_code, format, ...) \
+ error_at_line(1, error_code, __FILE__, __LINE__, format, ##__VA_ARGS__)
+
+#define MSR_LE 1UL
+#define LE 1UL
+
+pthread_t t0_ping;
+pthread_t t1_pong;
+
+int exit_from_pong;
+
+int trap_event;
+int le;
+
+bool success;
+
+void trap_signal_handler(int signo, siginfo_t *si, void *uc)
+{
+ ucontext_t *ucp = uc;
+ uint64_t thread_endianness;
+
+ /* Get thread endianness: extract bit LE from MSR */
+ thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR];
+
+ /***
+ * Little-Endian Machine
+ */
+
+ if (le) {
+ /* First trap event */
+ if (trap_event == 0) {
+ /* Do nothing. Since it is returning from this trap
+ * event that endianness is flipped by the bug, so just
+ * let the process return from the signal handler and
+ * check on the second trap event if endianness is
+ * flipped or not.
+ */
+ }
+ /* Second trap event */
+ else if (trap_event == 1) {
+ /*
+ * Since trap was caught in TM on first trap event, if
+ * endianness was still LE (not flipped inadvertently)
+ * after returning from the signal handler instruction
+ * (1) is executed (basically a 'nop'), as it's located
+ * at address of tbegin. +4 (rollback addr). As (1) on
+ * LE endianness does in effect nothing, instruction (2)
+ * is then executed again as 'trap', generating a second
+ * trap event (note that in that case 'trap' is caught
+ * not in transacional mode). On te other hand, if after
+ * the return from the signal handler the endianness in-
+ * advertently flipped, instruction (1) is tread as a
+ * branch instruction, i.e. b .+8, hence instruction (3)
+ * and (4) are executed (tbegin.; trap;) and we get sim-
+ * ilaly on the trap signal handler, but now in TM mode.
+ * Either way, it's now possible to check the MSR LE bit
+ * once in the trap handler to verify if endianness was
+ * flipped or not after the return from the second trap
+ * event. If endianness is flipped, the bug is present.
+ * Finally, getting a trap in TM mode or not is just
+ * worth noting because it affects the math to determine
+ * the offset added to the NIP on return: the NIP for a
+ * trap caught in TM is the rollback address, i.e. the
+ * next instruction after 'tbegin.', whilst the NIP for
+ * a trap caught in non-transactional mode is the very
+ * same address of the 'trap' instruction that generated
+ * the trap event.
+ */
+
+ if (thread_endianness == LE) {
+ /* Go to 'success', i.e. instruction (6) */
+ ucp->uc_mcontext.gp_regs[PT_NIP] += 16;
+ } else {
+ /*
+ * Thread endianness is BE, so it flipped
+ * inadvertently. Thus we flip back to LE and
+ * set NIP to go to 'failure', instruction (5).
+ */
+ ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL;
+ ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+ }
+ }
+ }
+
+ /***
+ * Big-Endian Machine
+ */
+
+ else {
+ /* First trap event */
+ if (trap_event == 0) {
+ /*
+ * Force thread endianness to be LE. Instructions (1),
+ * (3), and (4) will be executed, generating a second
+ * trap in TM mode.
+ */
+ ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL;
+ }
+ /* Second trap event */
+ else if (trap_event == 1) {
+ /*
+ * Do nothing. If bug is present on return from this
+ * second trap event endianness will flip back "automat-
+ * ically" to BE, otherwise thread endianness will
+ * continue to be LE, just as it was set above.
+ */
+ }
+ /* A third trap event */
+ else {
+ /*
+ * Once here it means that after returning from the sec-
+ * ond trap event instruction (4) (trap) was executed
+ * as LE, generating a third trap event. In that case
+ * endianness is still LE as set on return from the
+ * first trap event, hence no bug. Otherwise, bug
+ * flipped back to BE on return from the second trap
+ * event and instruction (4) was executed as 'tdi' (so
+ * basically a 'nop') and branch to 'failure' in
+ * instruction (5) was taken to indicate failure and we
+ * never get here.
+ */
+
+ /*
+ * Flip back to BE and go to instruction (6), i.e. go to
+ * 'success'.
+ */
+ ucp->uc_mcontext.gp_regs[PT_MSR] &= ~1UL;
+ ucp->uc_mcontext.gp_regs[PT_NIP] += 8;
+ }
+ }
+
+ trap_event++;
+}
+
+void usr1_signal_handler(int signo, siginfo_t *si, void *not_used)
+{
+ /* Got a USR1 signal from ping(), so just tell pong() to exit */
+ exit_from_pong = 1;
+}
+
+void *ping(void *not_used)
+{
+ uint64_t i;
+
+ trap_event = 0;
+
+ /*
+ * Wait an amount of context switches so load_fp and load_vec overflows
+ * and MSR_[FP|VEC|V] is 0.
+ */
+ for (i = 0; i < 1024*1024*512; i++)
+ ;
+
+ asm goto(
+ /*
+ * [NA] means "Native Endianness", i.e. it tells how a
+ * instruction is executed on machine's native endianness (in
+ * other words, native endianness matches kernel endianness).
+ * [OP] means "Opposite Endianness", i.e. on a BE machine, it
+ * tells how a instruction is executed as a LE instruction; con-
+ * versely, on a LE machine, it tells how a instruction is
+ * executed as a BE instruction. When [NA] is omitted, it means
+ * that the native interpretation of a given instruction is not
+ * relevant for the test. Likewise when [OP] is omitted.
+ */
+
+ " tbegin. ;" /* (0) tbegin. [NA] */
+ " tdi 0, 0, 0x48;" /* (1) nop [NA]; b (3) [OP] */
+ " trap ;" /* (2) trap [NA] */
+ ".long 0x1D05007C;" /* (3) tbegin. [OP] */
+ ".long 0x0800E07F;" /* (4) trap [OP]; nop [NA] */
+ " b %l[failure] ;" /* (5) b [NA]; MSR.LE flipped (bug) */
+ " b %l[success] ;" /* (6) b [NA]; MSR.LE did not flip (ok)*/
+
+ : : : : failure, success);
+
+failure:
+ success = false;
+ goto exit_from_ping;
+
+success:
+ success = true;
+
+exit_from_ping:
+ /* Tell pong() to exit before leaving */
+ pthread_kill(t1_pong, SIGUSR1);
+ return NULL;
+}
+
+void *pong(void *not_used)
+{
+ while (!exit_from_pong)
+ /*
+ * Induce context switches on ping() thread
+ * until ping() finishes its job and signs
+ * to exit from this loop.
+ */
+ sched_yield();
+
+ return NULL;
+}
+
+int tm_trap_test(void)
+{
+ uint16_t k = 1;
+
+ int rc;
+
+ pthread_attr_t attr;
+ cpu_set_t cpuset;
+
+ struct sigaction trap_sa;
+
+ SKIP_IF(!have_htm());
+
+ trap_sa.sa_flags = SA_SIGINFO;
+ trap_sa.sa_sigaction = trap_signal_handler;
+ sigaction(SIGTRAP, &trap_sa, NULL);
+
+ struct sigaction usr1_sa;
+
+ usr1_sa.sa_flags = SA_SIGINFO;
+ usr1_sa.sa_sigaction = usr1_signal_handler;
+ sigaction(SIGUSR1, &usr1_sa, NULL);
+
+ /* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+
+ /* Init pthread attribute */
+ rc = pthread_attr_init(&attr);
+ if (rc)
+ pr_error(rc, "pthread_attr_init()");
+
+ /*
+ * Bind thread ping() and pong() both to CPU 0 so they ping-pong and
+ * speed up context switches on ping() thread, speeding up the load_fp
+ * and load_vec overflow.
+ */
+ rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+ if (rc)
+ pr_error(rc, "pthread_attr_setaffinity()");
+
+ /* Figure out the machine endianness */
+ le = (int) *(uint8_t *)&k;
+
+ printf("%s machine detected. Checking if endianness flips %s",
+ le ? "Little-Endian" : "Big-Endian",
+ "inadvertently on trap in TM... ");
+
+ rc = fflush(0);
+ if (rc)
+ pr_error(rc, "fflush()");
+
+ /* Launch ping() */
+ rc = pthread_create(&t0_ping, &attr, ping, NULL);
+ if (rc)
+ pr_error(rc, "pthread_create()");
+
+ exit_from_pong = 0;
+
+ /* Launch pong() */
+ rc = pthread_create(&t1_pong, &attr, pong, NULL);
+ if (rc)
+ pr_error(rc, "pthread_create()");
+
+ rc = pthread_join(t0_ping, NULL);
+ if (rc)
+ pr_error(rc, "pthread_join()");
+
+ rc = pthread_join(t1_pong, NULL);
+ if (rc)
+ pr_error(rc, "pthread_join()");
+
+ if (success) {
+ printf("no.\n"); /* no, endianness did not flip inadvertently */
+ return EXIT_SUCCESS;
+ }
+
+ printf("yes!\n"); /* yes, endianness did flip inadvertently */
+ return EXIT_FAILURE;
+}
+
+int main(int argc, char **argv)
+{
+ return test_harness(tm_trap_test, "tm_trap_test");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
new file mode 100644
index 000000000..09894f4ff
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright 2017, Gustavo Romero, Breno Leitao, Cyril Bur, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Force FP, VEC and VSX unavailable exception during transaction in all
+ * possible scenarios regarding the MSR.FP and MSR.VEC state, e.g. when FP
+ * is enable and VEC is disable, when FP is disable and VEC is enable, and
+ * so on. Then we check if the restored state is correctly set for the
+ * FP and VEC registers to the previous state we set just before we entered
+ * in TM, i.e. we check if it corrupts somehow the recheckpointed FP and
+ * VEC/Altivec registers on abortion due to an unavailable exception in TM.
+ * N.B. In this test we do not test all the FP/Altivec/VSX registers for
+ * corruption, but only for registers vs0 and vs32, which are respectively
+ * representatives of FP and VEC/Altivec reg sets.
+ */
+
+#define _GNU_SOURCE
+#include <error.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <sched.h>
+
+#include "tm.h"
+
+#define DEBUG 0
+
+/* Unavailable exceptions to test in HTM */
+#define FP_UNA_EXCEPTION 0
+#define VEC_UNA_EXCEPTION 1
+#define VSX_UNA_EXCEPTION 2
+
+#define NUM_EXCEPTIONS 3
+#define err_at_line(status, errnum, format, ...) \
+ error_at_line(status, errnum, __FILE__, __LINE__, format ##__VA_ARGS__)
+
+#define pr_warn(code, format, ...) err_at_line(0, code, format, ##__VA_ARGS__)
+#define pr_err(code, format, ...) err_at_line(1, code, format, ##__VA_ARGS__)
+
+struct Flags {
+ int touch_fp;
+ int touch_vec;
+ int result;
+ int exception;
+} flags;
+
+bool expecting_failure(void)
+{
+ if (flags.touch_fp && flags.exception == FP_UNA_EXCEPTION)
+ return false;
+
+ if (flags.touch_vec && flags.exception == VEC_UNA_EXCEPTION)
+ return false;
+
+ /*
+ * If both FP and VEC are touched it does not mean that touching VSX
+ * won't raise an exception. However since FP and VEC state are already
+ * correctly loaded, the transaction is not aborted (i.e.
+ * treclaimed/trecheckpointed) and MSR.VSX is just set as 1, so a TM
+ * failure is not expected also in this case.
+ */
+ if ((flags.touch_fp && flags.touch_vec) &&
+ flags.exception == VSX_UNA_EXCEPTION)
+ return false;
+
+ return true;
+}
+
+/* Check if failure occurred whilst in transaction. */
+bool is_failure(uint64_t condition_reg)
+{
+ /*
+ * When failure handling occurs, CR0 is set to 0b1010 (0xa). Otherwise
+ * transaction completes without failure and hence reaches out 'tend.'
+ * that sets CR0 to 0b0100 (0x4).
+ */
+ return ((condition_reg >> 28) & 0xa) == 0xa;
+}
+
+void *tm_una_ping(void *input)
+{
+
+ /*
+ * Expected values for vs0 and vs32 after a TM failure. They must never
+ * change, otherwise they got corrupted.
+ */
+ uint64_t high_vs0 = 0x5555555555555555;
+ uint64_t low_vs0 = 0xffffffffffffffff;
+ uint64_t high_vs32 = 0x5555555555555555;
+ uint64_t low_vs32 = 0xffffffffffffffff;
+
+ /* Counter for busy wait */
+ uint64_t counter = 0x1ff000000;
+
+ /*
+ * Variable to keep a copy of CR register content taken just after we
+ * leave the transactional state.
+ */
+ uint64_t cr_ = 0;
+
+ /*
+ * Wait a bit so thread can get its name "ping". This is not important
+ * to reproduce the issue but it's nice to have for systemtap debugging.
+ */
+ if (DEBUG)
+ sleep(1);
+
+ printf("If MSR.FP=%d MSR.VEC=%d: ", flags.touch_fp, flags.touch_vec);
+
+ if (flags.exception != FP_UNA_EXCEPTION &&
+ flags.exception != VEC_UNA_EXCEPTION &&
+ flags.exception != VSX_UNA_EXCEPTION) {
+ printf("No valid exception specified to test.\n");
+ return NULL;
+ }
+
+ asm (
+ /* Prepare to merge low and high. */
+ " mtvsrd 33, %[high_vs0] ;"
+ " mtvsrd 34, %[low_vs0] ;"
+
+ /*
+ * Adjust VS0 expected value after an TM failure,
+ * i.e. vs0 = 0x5555555555555555555FFFFFFFFFFFFFFFF
+ */
+ " xxmrghd 0, 33, 34 ;"
+
+ /*
+ * Adjust VS32 expected value after an TM failure,
+ * i.e. vs32 = 0x5555555555555555555FFFFFFFFFFFFFFFF
+ */
+ " xxmrghd 32, 33, 34 ;"
+
+ /*
+ * Wait an amount of context switches so load_fp and load_vec
+ * overflow and MSR.FP, MSR.VEC, and MSR.VSX become zero (off).
+ */
+ " mtctr %[counter] ;"
+
+ /* Decrement CTR branch if CTR non zero. */
+ "1: bdnz 1b ;"
+
+ /*
+ * Check if we want to touch FP prior to the test in order
+ * to set MSR.FP = 1 before provoking an unavailable
+ * exception in TM.
+ */
+ " cmpldi %[touch_fp], 0 ;"
+ " beq no_fp ;"
+ " fadd 10, 10, 10 ;"
+ "no_fp: ;"
+
+ /*
+ * Check if we want to touch VEC prior to the test in order
+ * to set MSR.VEC = 1 before provoking an unavailable
+ * exception in TM.
+ */
+ " cmpldi %[touch_vec], 0 ;"
+ " beq no_vec ;"
+ " vaddcuw 10, 10, 10 ;"
+ "no_vec: ;"
+
+ /*
+ * Perhaps it would be a better idea to do the
+ * compares outside transactional context and simply
+ * duplicate code.
+ */
+ " tbegin. ;"
+ " beq trans_fail ;"
+
+ /* Do we do FP Unavailable? */
+ " cmpldi %[exception], %[ex_fp] ;"
+ " bne 1f ;"
+ " fadd 10, 10, 10 ;"
+ " b done ;"
+
+ /* Do we do VEC Unavailable? */
+ "1: cmpldi %[exception], %[ex_vec] ;"
+ " bne 2f ;"
+ " vaddcuw 10, 10, 10 ;"
+ " b done ;"
+
+ /*
+ * Not FP or VEC, therefore VSX. Ensure this
+ * instruction always generates a VSX Unavailable.
+ * ISA 3.0 is tricky here.
+ * (xxmrghd will on ISA 2.07 and ISA 3.0)
+ */
+ "2: xxmrghd 10, 10, 10 ;"
+
+ "done: tend. ;"
+
+ "trans_fail: ;"
+
+ /* Give values back to C. */
+ " mfvsrd %[high_vs0], 0 ;"
+ " xxsldwi 3, 0, 0, 2 ;"
+ " mfvsrd %[low_vs0], 3 ;"
+ " mfvsrd %[high_vs32], 32 ;"
+ " xxsldwi 3, 32, 32, 2 ;"
+ " mfvsrd %[low_vs32], 3 ;"
+
+ /* Give CR back to C so that it can check what happened. */
+ " mfcr %[cr_] ;"
+
+ : [high_vs0] "+r" (high_vs0),
+ [low_vs0] "+r" (low_vs0),
+ [high_vs32] "=r" (high_vs32),
+ [low_vs32] "=r" (low_vs32),
+ [cr_] "+r" (cr_)
+ : [touch_fp] "r" (flags.touch_fp),
+ [touch_vec] "r" (flags.touch_vec),
+ [exception] "r" (flags.exception),
+ [ex_fp] "i" (FP_UNA_EXCEPTION),
+ [ex_vec] "i" (VEC_UNA_EXCEPTION),
+ [ex_vsx] "i" (VSX_UNA_EXCEPTION),
+ [counter] "r" (counter)
+
+ : "cr0", "ctr", "v10", "vs0", "vs10", "vs3", "vs32", "vs33",
+ "vs34", "fr10"
+
+ );
+
+ /*
+ * Check if we were expecting a failure and it did not occur by checking
+ * CR0 state just after we leave the transaction. Either way we check if
+ * vs0 or vs32 got corrupted.
+ */
+ if (expecting_failure() && !is_failure(cr_)) {
+ printf("\n\tExpecting the transaction to fail, %s",
+ "but it didn't\n\t");
+ flags.result++;
+ }
+
+ /* Check if we were not expecting a failure and a it occurred. */
+ if (!expecting_failure() && is_failure(cr_) &&
+ !failure_is_reschedule()) {
+ printf("\n\tUnexpected transaction failure 0x%02lx\n\t",
+ failure_code());
+ return (void *) -1;
+ }
+
+ /*
+ * Check if TM failed due to the cause we were expecting. 0xda is a
+ * TM_CAUSE_FAC_UNAV cause, otherwise it's an unexpected cause, unless
+ * it was caused by a reschedule.
+ */
+ if (is_failure(cr_) && !failure_is_unavailable() &&
+ !failure_is_reschedule()) {
+ printf("\n\tUnexpected failure cause 0x%02lx\n\t",
+ failure_code());
+ return (void *) -1;
+ }
+
+ /* 0x4 is a success and 0xa is a fail. See comment in is_failure(). */
+ if (DEBUG)
+ printf("CR0: 0x%1lx ", cr_ >> 28);
+
+ /* Check FP (vs0) for the expected value. */
+ if (high_vs0 != 0x5555555555555555 || low_vs0 != 0xFFFFFFFFFFFFFFFF) {
+ printf("FP corrupted!");
+ printf(" high = %#16" PRIx64 " low = %#16" PRIx64 " ",
+ high_vs0, low_vs0);
+ flags.result++;
+ } else
+ printf("FP ok ");
+
+ /* Check VEC (vs32) for the expected value. */
+ if (high_vs32 != 0x5555555555555555 || low_vs32 != 0xFFFFFFFFFFFFFFFF) {
+ printf("VEC corrupted!");
+ printf(" high = %#16" PRIx64 " low = %#16" PRIx64,
+ high_vs32, low_vs32);
+ flags.result++;
+ } else
+ printf("VEC ok");
+
+ putchar('\n');
+
+ return NULL;
+}
+
+/* Thread to force context switch */
+void *tm_una_pong(void *not_used)
+{
+ /* Wait thread get its name "pong". */
+ if (DEBUG)
+ sleep(1);
+
+ /* Classed as an interactive-like thread. */
+ while (1)
+ sched_yield();
+}
+
+/* Function that creates a thread and launches the "ping" task. */
+void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
+{
+ int retries = 2;
+ void *ret_value;
+ pthread_t t0;
+
+ flags.touch_fp = fp;
+ flags.touch_vec = vec;
+
+ /*
+ * Without luck it's possible that the transaction is aborted not due to
+ * the unavailable exception caught in the middle as we expect but also,
+ * for instance, due to a context switch or due to a KVM reschedule (if
+ * it's running on a VM). Thus we try a few times before giving up,
+ * checking if the failure cause is the one we expect.
+ */
+ do {
+ int rc;
+
+ /* Bind to CPU 0, as specified in 'attr'. */
+ rc = pthread_create(&t0, attr, tm_una_ping, (void *) &flags);
+ if (rc)
+ pr_err(rc, "pthread_create()");
+ rc = pthread_setname_np(t0, "tm_una_ping");
+ if (rc)
+ pr_warn(rc, "pthread_setname_np");
+ rc = pthread_join(t0, &ret_value);
+ if (rc)
+ pr_err(rc, "pthread_join");
+
+ retries--;
+ } while (ret_value != NULL && retries);
+
+ if (!retries) {
+ flags.result = 1;
+ if (DEBUG)
+ printf("All transactions failed unexpectedly\n");
+
+ }
+}
+
+int tm_unavailable_test(void)
+{
+ int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
+ pthread_t t1;
+ pthread_attr_t attr;
+ cpu_set_t cpuset;
+
+ SKIP_IF(!have_htm());
+
+ /* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+
+ /* Init pthread attribute. */
+ rc = pthread_attr_init(&attr);
+ if (rc)
+ pr_err(rc, "pthread_attr_init()");
+
+ /* Set CPU 0 mask into the pthread attribute. */
+ rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+ if (rc)
+ pr_err(rc, "pthread_attr_setaffinity_np()");
+
+ rc = pthread_create(&t1, &attr /* Bind to CPU 0 */, tm_una_pong, NULL);
+ if (rc)
+ pr_err(rc, "pthread_create()");
+
+ /* Name it for systemtap convenience */
+ rc = pthread_setname_np(t1, "tm_una_pong");
+ if (rc)
+ pr_warn(rc, "pthread_create()");
+
+ flags.result = 0;
+
+ for (exception = 0; exception < NUM_EXCEPTIONS; exception++) {
+ printf("Checking if FP/VEC registers are sane after");
+
+ if (exception == FP_UNA_EXCEPTION)
+ printf(" a FP unavailable exception...\n");
+
+ else if (exception == VEC_UNA_EXCEPTION)
+ printf(" a VEC unavailable exception...\n");
+
+ else
+ printf(" a VSX unavailable exception...\n");
+
+ flags.exception = exception;
+
+ test_fp_vec(0, 0, &attr);
+ test_fp_vec(1, 0, &attr);
+ test_fp_vec(0, 1, &attr);
+ test_fp_vec(1, 1, &attr);
+
+ }
+
+ if (flags.result > 0) {
+ printf("result: failed!\n");
+ exit(1);
+ } else {
+ printf("result: success\n");
+ exit(0);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ test_harness_set_timeout(220);
+ return test_harness(tm_unavailable_test, "tm_unavailable_test");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
new file mode 100644
index 000000000..137185ba4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2017, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ * Original: Breno Leitao <brenohl@br.ibm.com> &
+ * Gustavo Bueno Romero <gromero@br.ibm.com>
+ * Edited: Michael Neuling
+ *
+ * Force VMX unavailable during a transaction and see if it corrupts
+ * the checkpointed VMX register state after the abort.
+ */
+
+#include <inttypes.h>
+#include <htmintrin.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <pthread.h>
+
+#include "tm.h"
+#include "utils.h"
+
+int passed;
+
+void *worker(void *unused)
+{
+ __int128 vmx0;
+ uint64_t texasr;
+
+ asm goto (
+ "li 3, 1;" /* Stick non-zero value in VMX0 */
+ "std 3, 0(%[vmx0_ptr]);"
+ "lvx 0, 0, %[vmx0_ptr];"
+
+ /* Wait here a bit so we get scheduled out 255 times */
+ "lis 3, 0x3fff;"
+ "1: ;"
+ "addi 3, 3, -1;"
+ "cmpdi 3, 0;"
+ "bne 1b;"
+
+ /* Kernel will hopefully turn VMX off now */
+
+ "tbegin. ;"
+ "beq failure;"
+
+ /* Cause VMX unavail. Any VMX instruction */
+ "vaddcuw 0,0,0;"
+
+ "tend. ;"
+ "b %l[success];"
+
+ /* Check VMX0 sanity after abort */
+ "failure: ;"
+ "lvx 1, 0, %[vmx0_ptr];"
+ "vcmpequb. 2, 0, 1;"
+ "bc 4, 24, %l[value_mismatch];"
+ "b %l[value_match];"
+ :
+ : [vmx0_ptr] "r"(&vmx0)
+ : "r3"
+ : success, value_match, value_mismatch
+ );
+
+ /* HTM aborted and VMX0 is corrupted */
+value_mismatch:
+ texasr = __builtin_get_texasr();
+
+ printf("\n\n==============\n\n");
+ printf("Failure with error: %lx\n", _TEXASR_FAILURE_CODE(texasr));
+ printf("Summary error : %lx\n", _TEXASR_FAILURE_SUMMARY(texasr));
+ printf("TFIAR exact : %lx\n\n", _TEXASR_TFIAR_EXACT(texasr));
+
+ passed = 0;
+ return NULL;
+
+ /* HTM aborted but VMX0 is correct */
+value_match:
+// printf("!");
+ return NULL;
+
+success:
+// printf(".");
+ return NULL;
+}
+
+int tm_vmx_unavail_test()
+{
+ int threads;
+ pthread_t *thread;
+
+ SKIP_IF(!have_htm());
+
+ passed = 1;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * 4;
+ thread = malloc(sizeof(pthread_t)*threads);
+ if (!thread)
+ return EXIT_FAILURE;
+
+ for (uint64_t i = 0; i < threads; i++)
+ pthread_create(&thread[i], NULL, &worker, NULL);
+
+ for (uint64_t i = 0; i < threads; i++)
+ pthread_join(thread[i], NULL);
+
+ free(thread);
+
+ return passed ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+
+int main(int argc, char **argv)
+{
+ return test_harness(tm_vmx_unavail_test, "tm_vmx_unavail_test");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
new file mode 100644
index 000000000..fe5281158
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Original: Michael Neuling 4/12/2013
+ * Edited: Rashmica Gupta 4/12/2015
+ *
+ * See if the altivec state is leaked out of an aborted transaction due to
+ * kernel vmx copy loops.
+ *
+ * When the transaction aborts, VSR values should rollback to the values
+ * they held before the transaction commenced. Using VSRs while transaction
+ * is suspended should not affect the checkpointed values.
+ *
+ * (1) write A to a VSR
+ * (2) start transaction
+ * (3) suspend transaction
+ * (4) change the VSR to B
+ * (5) trigger kernel vmx copy loop
+ * (6) abort transaction
+ * (7) check that the VSR value is A
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <assert.h>
+
+#include "tm.h"
+#include "utils.h"
+
+int test_vmxcopy()
+{
+ long double vecin = 1.3;
+ long double vecout;
+ unsigned long pgsize = getpagesize();
+ int i;
+ int fd;
+ int size = pgsize*16;
+ char tmpfile[] = "/tmp/page_faultXXXXXX";
+ char buf[pgsize];
+ char *a;
+ uint64_t aborted = 0;
+
+ SKIP_IF(!have_htm());
+ SKIP_IF(!is_ppc64le());
+
+ fd = mkstemp(tmpfile);
+ assert(fd >= 0);
+
+ memset(buf, 0, pgsize);
+ for (i = 0; i < size; i += pgsize)
+ assert(write(fd, buf, pgsize) == pgsize);
+
+ unlink(tmpfile);
+
+ a = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+ assert(a != MAP_FAILED);
+
+ asm __volatile__(
+ "lxvd2x 40,0,%[vecinptr];" /* set 40 to initial value*/
+ "tbegin.;"
+ "beq 3f;"
+ "tsuspend.;"
+ "xxlxor 40,40,40;" /* set 40 to 0 */
+ "std 5, 0(%[map]);" /* cause kernel vmx copy page */
+ "tabort. 0;"
+ "tresume.;"
+ "tend.;"
+ "li %[res], 0;"
+ "b 5f;"
+
+ /* Abort handler */
+ "3:;"
+ "li %[res], 1;"
+
+ "5:;"
+ "stxvd2x 40,0,%[vecoutptr];"
+ : [res]"=r"(aborted)
+ : [vecinptr]"r"(&vecin),
+ [vecoutptr]"r"(&vecout),
+ [map]"r"(a)
+ : "memory", "r0", "r3", "r4", "r5", "r6", "r7");
+
+ if (aborted && (vecin != vecout)){
+ printf("FAILED: vector state leaked on abort %f != %f\n",
+ (double)vecin, (double)vecout);
+ return 1;
+ }
+
+ munmap(a, size);
+
+ close(fd);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_vmxcopy, "tm_vmxcopy");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
new file mode 100644
index 000000000..5518b1d4e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2015, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_TM_TM_H
+#define _SELFTESTS_POWERPC_TM_TM_H
+
+#include <asm/tm.h>
+#include <asm/cputable.h>
+#include <stdbool.h>
+
+#include "utils.h"
+
+static inline bool have_htm(void)
+{
+#ifdef PPC_FEATURE2_HTM
+ return have_hwcap2(PPC_FEATURE2_HTM);
+#else
+ printf("PPC_FEATURE2_HTM not defined, can't check AT_HWCAP2\n");
+ return false;
+#endif
+}
+
+static inline bool have_htm_nosc(void)
+{
+#ifdef PPC_FEATURE2_HTM_NOSC
+ return have_hwcap2(PPC_FEATURE2_HTM_NOSC);
+#else
+ printf("PPC_FEATURE2_HTM_NOSC not defined, can't check AT_HWCAP2\n");
+ return false;
+#endif
+}
+
+static inline long failure_code(void)
+{
+ return __builtin_get_texasru() >> 24;
+}
+
+static inline bool failure_is_persistent(void)
+{
+ return (failure_code() & TM_CAUSE_PERSISTENT) == TM_CAUSE_PERSISTENT;
+}
+
+static inline bool failure_is_syscall(void)
+{
+ return (failure_code() & TM_CAUSE_SYSCALL) == TM_CAUSE_SYSCALL;
+}
+
+static inline bool failure_is_unavailable(void)
+{
+ return (failure_code() & TM_CAUSE_FAC_UNAV) == TM_CAUSE_FAC_UNAV;
+}
+
+static inline bool failure_is_reschedule(void)
+{
+ if ((failure_code() & TM_CAUSE_RESCHED) == TM_CAUSE_RESCHED ||
+ (failure_code() & TM_CAUSE_KVM_RESCHED) == TM_CAUSE_KVM_RESCHED)
+ return true;
+
+ return false;
+}
+
+static inline bool failure_is_nesting(void)
+{
+ return (__builtin_get_texasru() & 0x400000);
+}
+
+static inline int tcheck(void)
+{
+ long cr;
+ asm volatile ("tcheck 0" : "=r"(cr) : : "cr0");
+ return (cr >> 28) & 4;
+}
+
+static inline bool tcheck_doomed(void)
+{
+ return tcheck() & 8;
+}
+
+static inline bool tcheck_active(void)
+{
+ return tcheck() & 4;
+}
+
+static inline bool tcheck_suspended(void)
+{
+ return tcheck() & 2;
+}
+
+static inline bool tcheck_transactional(void)
+{
+ return tcheck() & 6;
+}
+
+#endif /* _SELFTESTS_POWERPC_TM_TM_H */
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
new file mode 100644
index 000000000..ba0959d45
--- /dev/null
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2013-2015, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE /* For CPU_ZERO etc. */
+
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <link.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+static char auxv[4096];
+
+int read_auxv(char *buf, ssize_t buf_size)
+{
+ ssize_t num;
+ int rc, fd;
+
+ fd = open("/proc/self/auxv", O_RDONLY);
+ if (fd == -1) {
+ perror("open");
+ return -errno;
+ }
+
+ num = read(fd, buf, buf_size);
+ if (num < 0) {
+ perror("read");
+ rc = -EIO;
+ goto out;
+ }
+
+ if (num > buf_size) {
+ printf("overflowed auxv buffer\n");
+ rc = -EOVERFLOW;
+ goto out;
+ }
+
+ rc = 0;
+out:
+ close(fd);
+ return rc;
+}
+
+void *find_auxv_entry(int type, char *auxv)
+{
+ ElfW(auxv_t) *p;
+
+ p = (ElfW(auxv_t) *)auxv;
+
+ while (p->a_type != AT_NULL) {
+ if (p->a_type == type)
+ return p;
+
+ p++;
+ }
+
+ return NULL;
+}
+
+void *get_auxv_entry(int type)
+{
+ ElfW(auxv_t) *p;
+
+ if (read_auxv(auxv, sizeof(auxv)))
+ return NULL;
+
+ p = find_auxv_entry(type, auxv);
+ if (p)
+ return (void *)p->a_un.a_val;
+
+ return NULL;
+}
+
+int pick_online_cpu(void)
+{
+ int ncpus, cpu = -1;
+ cpu_set_t *mask;
+ size_t size;
+
+ ncpus = get_nprocs_conf();
+ size = CPU_ALLOC_SIZE(ncpus);
+ mask = CPU_ALLOC(ncpus);
+ if (!mask) {
+ perror("malloc");
+ return -1;
+ }
+
+ CPU_ZERO_S(size, mask);
+
+ if (sched_getaffinity(0, size, mask)) {
+ perror("sched_getaffinity");
+ goto done;
+ }
+
+ /* We prefer a primary thread, but skip 0 */
+ for (cpu = 8; cpu < ncpus; cpu += 8)
+ if (CPU_ISSET_S(cpu, size, mask))
+ goto done;
+
+ /* Search for anything, but in reverse */
+ for (cpu = ncpus - 1; cpu >= 0; cpu--)
+ if (CPU_ISSET_S(cpu, size, mask))
+ goto done;
+
+ printf("No cpus in affinity mask?!\n");
+
+done:
+ CPU_FREE(mask);
+ return cpu;
+}
+
+bool is_ppc64le(void)
+{
+ struct utsname uts;
+ int rc;
+
+ errno = 0;
+ rc = uname(&uts);
+ if (rc) {
+ perror("uname");
+ return false;
+ }
+
+ return strcmp(uts.machine, "ppc64le") == 0;
+}
diff --git a/tools/testing/selftests/powerpc/vphn/.gitignore b/tools/testing/selftests/powerpc/vphn/.gitignore
new file mode 100644
index 000000000..7c0439501
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/.gitignore
@@ -0,0 +1 @@
+test-vphn
diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile
new file mode 100644
index 000000000..fb82068c9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/Makefile
@@ -0,0 +1,9 @@
+TEST_GEN_PROGS := test-vphn
+
+CFLAGS += -m64
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
+
diff --git a/tools/testing/selftests/powerpc/vphn/test-vphn.c b/tools/testing/selftests/powerpc/vphn/test-vphn.c
new file mode 100644
index 000000000..81d3069ff
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/test-vphn.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <byteswap.h>
+#include "utils.h"
+#include "subunit.h"
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define cpu_to_be32(x) bswap_32(x)
+#define be32_to_cpu(x) bswap_32(x)
+#define be16_to_cpup(x) bswap_16(*x)
+#define cpu_to_be64(x) bswap_64(x)
+#else
+#define cpu_to_be32(x) (x)
+#define be32_to_cpu(x) (x)
+#define be16_to_cpup(x) (*x)
+#define cpu_to_be64(x) (x)
+#endif
+
+#include "vphn.c"
+
+static struct test {
+ char *descr;
+ long input[VPHN_REGISTER_COUNT];
+ u32 expected[VPHN_ASSOC_BUFSIZE];
+} all_tests[] = {
+ {
+ "vphn: no data",
+ {
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000000
+ }
+ },
+ {
+ "vphn: 1 x 16-bit value",
+ {
+ 0x8001ffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000001,
+ 0x00000001
+ }
+ },
+ {
+ "vphn: 2 x 16-bit values",
+ {
+ 0x80018002ffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000002,
+ 0x00000001,
+ 0x00000002
+ }
+ },
+ {
+ "vphn: 3 x 16-bit values",
+ {
+ 0x800180028003ffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000003,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003
+ }
+ },
+ {
+ "vphn: 4 x 16-bit values",
+ {
+ 0x8001800280038004,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000004,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004
+ }
+ },
+ {
+ /* Parsing the next 16-bit value out of the next 64-bit input
+ * value.
+ */
+ "vphn: 5 x 16-bit values",
+ {
+ 0x8001800280038004,
+ 0x8005ffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000005,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004,
+ 0x00000005
+ }
+ },
+ {
+ /* Parse at most 6 x 64-bit input values */
+ "vphn: 24 x 16-bit values",
+ {
+ 0x8001800280038004,
+ 0x8005800680078008,
+ 0x8009800a800b800c,
+ 0x800d800e800f8010,
+ 0x8011801280138014,
+ 0x8015801680178018
+ },
+ {
+ 0x00000018,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004,
+ 0x00000005,
+ 0x00000006,
+ 0x00000007,
+ 0x00000008,
+ 0x00000009,
+ 0x0000000a,
+ 0x0000000b,
+ 0x0000000c,
+ 0x0000000d,
+ 0x0000000e,
+ 0x0000000f,
+ 0x00000010,
+ 0x00000011,
+ 0x00000012,
+ 0x00000013,
+ 0x00000014,
+ 0x00000015,
+ 0x00000016,
+ 0x00000017,
+ 0x00000018
+ }
+ },
+ {
+ "vphn: 1 x 32-bit value",
+ {
+ 0x00000001ffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000001,
+ 0x00000001
+ }
+ },
+ {
+ "vphn: 2 x 32-bit values",
+ {
+ 0x0000000100000002,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000002,
+ 0x00000001,
+ 0x00000002
+ }
+ },
+ {
+ /* Parsing the next 32-bit value out of the next 64-bit input
+ * value.
+ */
+ "vphn: 3 x 32-bit values",
+ {
+ 0x0000000100000002,
+ 0x00000003ffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000003,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003
+ }
+ },
+ {
+ /* Parse at most 6 x 64-bit input values */
+ "vphn: 12 x 32-bit values",
+ {
+ 0x0000000100000002,
+ 0x0000000300000004,
+ 0x0000000500000006,
+ 0x0000000700000008,
+ 0x000000090000000a,
+ 0x0000000b0000000c
+ },
+ {
+ 0x0000000c,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004,
+ 0x00000005,
+ 0x00000006,
+ 0x00000007,
+ 0x00000008,
+ 0x00000009,
+ 0x0000000a,
+ 0x0000000b,
+ 0x0000000c
+ }
+ },
+ {
+ "vphn: 16-bit value followed by 32-bit value",
+ {
+ 0x800100000002ffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000002,
+ 0x00000001,
+ 0x00000002
+ }
+ },
+ {
+ "vphn: 32-bit value followed by 16-bit value",
+ {
+ 0x000000018002ffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000002,
+ 0x00000001,
+ 0x00000002
+ }
+ },
+ {
+ /* Parse a 32-bit value split accross two consecutives 64-bit
+ * input values.
+ */
+ "vphn: 16-bit value followed by 2 x 32-bit values",
+ {
+ 0x8001000000020000,
+ 0x0003ffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000003,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004,
+ 0x00000005
+ }
+ },
+ {
+ /* The lower bits in 0x0001ffff don't get mixed up with the
+ * 0xffff terminator.
+ */
+ "vphn: 32-bit value has all ones in 16 lower bits",
+ {
+ 0x0001ffff80028003,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000003,
+ 0x0001ffff,
+ 0x00000002,
+ 0x00000003
+ }
+ },
+ {
+ /* The following input doesn't follow the specification.
+ */
+ "vphn: last 32-bit value is truncated",
+ {
+ 0x0000000100000002,
+ 0x0000000300000004,
+ 0x0000000500000006,
+ 0x0000000700000008,
+ 0x000000090000000a,
+ 0x0000000b800c2bad
+ },
+ {
+ 0x0000000c,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004,
+ 0x00000005,
+ 0x00000006,
+ 0x00000007,
+ 0x00000008,
+ 0x00000009,
+ 0x0000000a,
+ 0x0000000b,
+ 0x0000000c
+ }
+ },
+ {
+ "vphn: garbage after terminator",
+ {
+ 0xffff2bad2bad2bad,
+ 0x2bad2bad2bad2bad,
+ 0x2bad2bad2bad2bad,
+ 0x2bad2bad2bad2bad,
+ 0x2bad2bad2bad2bad,
+ 0x2bad2bad2bad2bad
+ },
+ {
+ 0x00000000
+ }
+ },
+ {
+ NULL
+ }
+};
+
+static int test_one(struct test *test)
+{
+ __be32 output[VPHN_ASSOC_BUFSIZE] = { 0 };
+ int i, len;
+
+ vphn_unpack_associativity(test->input, output);
+
+ len = be32_to_cpu(output[0]);
+ if (len != test->expected[0]) {
+ printf("expected %d elements, got %d\n", test->expected[0],
+ len);
+ return 1;
+ }
+
+ for (i = 1; i < len; i++) {
+ u32 val = be32_to_cpu(output[i]);
+ if (val != test->expected[i]) {
+ printf("element #%d is 0x%x, should be 0x%x\n", i, val,
+ test->expected[i]);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int test_vphn(void)
+{
+ static struct test *test;
+
+ for (test = all_tests; test->descr; test++) {
+ int ret;
+
+ ret = test_one(test);
+ test_finish(test->descr, ret);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ return test_harness(test_vphn, "test-vphn");
+}
diff --git a/tools/testing/selftests/powerpc/vphn/vphn.c b/tools/testing/selftests/powerpc/vphn/vphn.c
new file mode 120000
index 000000000..186b906e6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/vphn.c
@@ -0,0 +1 @@
+../../../../../arch/powerpc/mm/vphn.c \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/vphn/vphn.h b/tools/testing/selftests/powerpc/vphn/vphn.h
new file mode 120000
index 000000000..7131efe38
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/vphn.h
@@ -0,0 +1 @@
+../../../../../arch/powerpc/mm/vphn.h \ No newline at end of file
diff --git a/tools/testing/selftests/prctl/.gitignore b/tools/testing/selftests/prctl/.gitignore
new file mode 100644
index 000000000..0b5c27447
--- /dev/null
+++ b/tools/testing/selftests/prctl/.gitignore
@@ -0,0 +1,3 @@
+disable-tsc-ctxt-sw-stress-test
+disable-tsc-on-off-stress-test
+disable-tsc-test
diff --git a/tools/testing/selftests/prctl/Makefile b/tools/testing/selftests/prctl/Makefile
new file mode 100644
index 000000000..c7923b205
--- /dev/null
+++ b/tools/testing/selftests/prctl/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef CROSS_COMPILE
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq ($(ARCH),x86)
+TEST_PROGS := disable-tsc-ctxt-sw-stress-test disable-tsc-on-off-stress-test \
+ disable-tsc-test
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+ rm -fr $(TEST_PROGS)
+endif
+endif
diff --git a/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c b/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c
new file mode 100644
index 000000000..62a93cc61
--- /dev/null
+++ b/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
+ *
+ * Tests if the control register is updated correctly
+ * at context switches
+ *
+ * Warning: this test will cause a very high load for a few seconds
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+#include <wait.h>
+
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#ifndef PR_GET_TSC
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
+#endif
+
+static uint64_t rdtsc(void)
+{
+uint32_t lo, hi;
+/* We cannot use "=A", since this would use %rax on x86_64 */
+__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+return (uint64_t)hi << 32 | lo;
+}
+
+static void sigsegv_expect(int sig)
+{
+ /* */
+}
+
+static void segvtask(void)
+{
+ if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
+ {
+ perror("prctl");
+ exit(0);
+ }
+ signal(SIGSEGV, sigsegv_expect);
+ alarm(10);
+ rdtsc();
+ fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
+ exit(0);
+}
+
+
+static void sigsegv_fail(int sig)
+{
+ fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
+ exit(0);
+}
+
+static void rdtsctask(void)
+{
+ if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
+ {
+ perror("prctl");
+ exit(0);
+ }
+ signal(SIGSEGV, sigsegv_fail);
+ alarm(10);
+ for(;;) rdtsc();
+}
+
+
+int main(void)
+{
+ int n_tasks = 100, i;
+
+ fprintf(stderr, "[No further output means we're allright]\n");
+
+ for (i=0; i<n_tasks; i++)
+ if (fork() == 0)
+ {
+ if (i & 1)
+ segvtask();
+ else
+ rdtsctask();
+ }
+
+ for (i=0; i<n_tasks; i++)
+ wait(NULL);
+
+ exit(0);
+}
+
diff --git a/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c b/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c
new file mode 100644
index 000000000..79950f9a2
--- /dev/null
+++ b/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
+ *
+ * Tests if the control register is updated correctly
+ * when set with prctl()
+ *
+ * Warning: this test will cause a very high load for a few seconds
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+#include <wait.h>
+
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#ifndef PR_GET_TSC
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
+#endif
+
+/* snippet from wikipedia :-) */
+
+static uint64_t rdtsc(void)
+{
+uint32_t lo, hi;
+/* We cannot use "=A", since this would use %rax on x86_64 */
+__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+return (uint64_t)hi << 32 | lo;
+}
+
+int should_segv = 0;
+
+static void sigsegv_cb(int sig)
+{
+ if (!should_segv)
+ {
+ fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
+ exit(0);
+ }
+ if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
+ {
+ perror("prctl");
+ exit(0);
+ }
+ should_segv = 0;
+
+ rdtsc();
+}
+
+static void task(void)
+{
+ signal(SIGSEGV, sigsegv_cb);
+ alarm(10);
+ for(;;)
+ {
+ rdtsc();
+ if (should_segv)
+ {
+ fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
+ exit(0);
+ }
+ if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
+ {
+ perror("prctl");
+ exit(0);
+ }
+ should_segv = 1;
+ }
+}
+
+
+int main(void)
+{
+ int n_tasks = 100, i;
+
+ fprintf(stderr, "[No further output means we're allright]\n");
+
+ for (i=0; i<n_tasks; i++)
+ if (fork() == 0)
+ task();
+
+ for (i=0; i<n_tasks; i++)
+ wait(NULL);
+
+ exit(0);
+}
+
diff --git a/tools/testing/selftests/prctl/disable-tsc-test.c b/tools/testing/selftests/prctl/disable-tsc-test.c
new file mode 100644
index 000000000..f84d4ee11
--- /dev/null
+++ b/tools/testing/selftests/prctl/disable-tsc-test.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
+ *
+ * Basic test to test behaviour of PR_GET_TSC and PR_SET_TSC
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#ifndef PR_GET_TSC
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
+#endif
+
+const char *tsc_names[] =
+{
+ [0] = "[not set]",
+ [PR_TSC_ENABLE] = "PR_TSC_ENABLE",
+ [PR_TSC_SIGSEGV] = "PR_TSC_SIGSEGV",
+};
+
+static uint64_t rdtsc(void)
+{
+uint32_t lo, hi;
+/* We cannot use "=A", since this would use %rax on x86_64 */
+__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+return (uint64_t)hi << 32 | lo;
+}
+
+static void sigsegv_cb(int sig)
+{
+ int tsc_val = 0;
+
+ printf("[ SIG_SEGV ]\n");
+ printf("prctl(PR_GET_TSC, &tsc_val); ");
+ fflush(stdout);
+
+ if ( prctl(PR_GET_TSC, &tsc_val) == -1)
+ perror("prctl");
+
+ printf("tsc_val == %s\n", tsc_names[tsc_val]);
+ printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
+ fflush(stdout);
+ if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
+ perror("prctl");
+
+ printf("rdtsc() == ");
+}
+
+int main(void)
+{
+ int tsc_val = 0;
+
+ signal(SIGSEGV, sigsegv_cb);
+
+ printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
+ printf("prctl(PR_GET_TSC, &tsc_val); ");
+ fflush(stdout);
+
+ if ( prctl(PR_GET_TSC, &tsc_val) == -1)
+ perror("prctl");
+
+ printf("tsc_val == %s\n", tsc_names[tsc_val]);
+ printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
+ printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
+ fflush(stdout);
+
+ if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
+ perror("prctl");
+
+ printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
+ printf("prctl(PR_SET_TSC, PR_TSC_SIGSEGV)\n");
+ fflush(stdout);
+
+ if ( prctl(PR_SET_TSC, PR_TSC_SIGSEGV) == -1)
+ perror("prctl");
+
+ printf("rdtsc() == ");
+ fflush(stdout);
+ printf("%llu\n", (unsigned long long)rdtsc());
+ fflush(stdout);
+
+ exit(EXIT_SUCCESS);
+}
+
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
new file mode 100644
index 000000000..29bac5ef9
--- /dev/null
+++ b/tools/testing/selftests/proc/.gitignore
@@ -0,0 +1,14 @@
+/fd-001-lookup
+/fd-002-posix-eq
+/fd-003-kthread
+/proc-loadavg-001
+/proc-self-map-files-001
+/proc-self-map-files-002
+/proc-self-syscall
+/proc-self-wchan
+/proc-uptime-001
+/proc-uptime-002
+/read
+/self
+/setns-dcache
+/thread-self
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
new file mode 100644
index 000000000..434d033ee
--- /dev/null
+++ b/tools/testing/selftests/proc/Makefile
@@ -0,0 +1,20 @@
+CFLAGS += -Wall -O2 -Wno-unused-function
+CFLAGS += -D_GNU_SOURCE
+
+TEST_GEN_PROGS :=
+TEST_GEN_PROGS += fd-001-lookup
+TEST_GEN_PROGS += fd-002-posix-eq
+TEST_GEN_PROGS += fd-003-kthread
+TEST_GEN_PROGS += proc-loadavg-001
+TEST_GEN_PROGS += proc-self-map-files-001
+TEST_GEN_PROGS += proc-self-map-files-002
+TEST_GEN_PROGS += proc-self-syscall
+TEST_GEN_PROGS += proc-self-wchan
+TEST_GEN_PROGS += proc-uptime-001
+TEST_GEN_PROGS += proc-uptime-002
+TEST_GEN_PROGS += read
+TEST_GEN_PROGS += self
+TEST_GEN_PROGS += setns-dcache
+TEST_GEN_PROGS += thread-self
+
+include ../lib.mk
diff --git a/tools/testing/selftests/proc/config b/tools/testing/selftests/proc/config
new file mode 100644
index 000000000..68fbd2b35
--- /dev/null
+++ b/tools/testing/selftests/proc/config
@@ -0,0 +1 @@
+CONFIG_PROC_FS=y
diff --git a/tools/testing/selftests/proc/fd-001-lookup.c b/tools/testing/selftests/proc/fd-001-lookup.c
new file mode 100644
index 000000000..60d7948e7
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-001-lookup.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test /proc/*/fd lookup.
+
+#undef NDEBUG
+#include <assert.h>
+#include <dirent.h>
+#include <errno.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc.h"
+
+/* lstat(2) has more "coverage" in case non-symlink pops up somehow. */
+static void test_lookup_pass(const char *pathname)
+{
+ struct stat st;
+ ssize_t rv;
+
+ memset(&st, 0, sizeof(struct stat));
+ rv = lstat(pathname, &st);
+ assert(rv == 0);
+ assert(S_ISLNK(st.st_mode));
+}
+
+static void test_lookup_fail(const char *pathname)
+{
+ struct stat st;
+ ssize_t rv;
+
+ rv = lstat(pathname, &st);
+ assert(rv == -1 && errno == ENOENT);
+}
+
+static void test_lookup(unsigned int fd)
+{
+ char buf[64];
+ unsigned int c;
+ unsigned int u;
+ int i;
+
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd);
+ test_lookup_pass(buf);
+
+ /* leading junk */
+ for (c = 1; c <= 255; c++) {
+ if (c == '/')
+ continue;
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%c%u", c, fd);
+ test_lookup_fail(buf);
+ }
+
+ /* trailing junk */
+ for (c = 1; c <= 255; c++) {
+ if (c == '/')
+ continue;
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%u%c", fd, c);
+ test_lookup_fail(buf);
+ }
+
+ for (i = INT_MIN; i < INT_MIN + 1024; i++) {
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", i);
+ test_lookup_fail(buf);
+ }
+ for (i = -1024; i < 0; i++) {
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", i);
+ test_lookup_fail(buf);
+ }
+ for (u = INT_MAX - 1024; u <= (unsigned int)INT_MAX + 1024; u++) {
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%u", u);
+ test_lookup_fail(buf);
+ }
+ for (u = UINT_MAX - 1024; u != 0; u++) {
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%u", u);
+ test_lookup_fail(buf);
+ }
+
+
+}
+
+int main(void)
+{
+ struct dirent *de;
+ unsigned int fd, target_fd;
+
+ if (unshare(CLONE_FILES) == -1)
+ return 1;
+
+ /* Wipe fdtable. */
+ do {
+ DIR *d;
+
+ d = opendir("/proc/self/fd");
+ if (!d)
+ return 1;
+
+ de = xreaddir(d);
+ assert(de->d_type == DT_DIR);
+ assert(streq(de->d_name, "."));
+
+ de = xreaddir(d);
+ assert(de->d_type == DT_DIR);
+ assert(streq(de->d_name, ".."));
+next:
+ de = xreaddir(d);
+ if (de) {
+ unsigned long long fd_ull;
+ unsigned int fd;
+ char *end;
+
+ assert(de->d_type == DT_LNK);
+
+ fd_ull = xstrtoull(de->d_name, &end);
+ assert(*end == '\0');
+ assert(fd_ull == (unsigned int)fd_ull);
+
+ fd = fd_ull;
+ if (fd == dirfd(d))
+ goto next;
+ close(fd);
+ }
+
+ closedir(d);
+ } while (de);
+
+ /* Now fdtable is clean. */
+
+ fd = open("/", O_PATH|O_DIRECTORY);
+ assert(fd == 0);
+ test_lookup(fd);
+ close(fd);
+
+ /* Clean again! */
+
+ fd = open("/", O_PATH|O_DIRECTORY);
+ assert(fd == 0);
+ /* Default RLIMIT_NOFILE-1 */
+ target_fd = 1023;
+ while (target_fd > 0) {
+ if (dup2(fd, target_fd) == target_fd)
+ break;
+ target_fd /= 2;
+ }
+ assert(target_fd > 0);
+ close(fd);
+ test_lookup(target_fd);
+ close(target_fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/fd-002-posix-eq.c b/tools/testing/selftests/proc/fd-002-posix-eq.c
new file mode 100644
index 000000000..417322ca9
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-002-posix-eq.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that open(/proc/*/fd/*) opens the same file.
+#undef NDEBUG
+#include <assert.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+int main(void)
+{
+ int fd0, fd1, fd2;
+ struct stat st0, st1, st2;
+ char buf[64];
+ int rv;
+
+ fd0 = open("/", O_DIRECTORY|O_RDONLY);
+ assert(fd0 >= 0);
+
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd0);
+ fd1 = open(buf, O_RDONLY);
+ assert(fd1 >= 0);
+
+ snprintf(buf, sizeof(buf), "/proc/thread-self/fd/%u", fd0);
+ fd2 = open(buf, O_RDONLY);
+ assert(fd2 >= 0);
+
+ rv = fstat(fd0, &st0);
+ assert(rv == 0);
+ rv = fstat(fd1, &st1);
+ assert(rv == 0);
+ rv = fstat(fd2, &st2);
+ assert(rv == 0);
+
+ assert(st0.st_dev == st1.st_dev);
+ assert(st0.st_ino == st1.st_ino);
+
+ assert(st0.st_dev == st2.st_dev);
+ assert(st0.st_ino == st2.st_ino);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/fd-003-kthread.c b/tools/testing/selftests/proc/fd-003-kthread.c
new file mode 100644
index 000000000..dc591f97b
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-003-kthread.c
@@ -0,0 +1,178 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that /proc/$KERNEL_THREAD/fd/ is empty.
+
+#undef NDEBUG
+#include <sys/syscall.h>
+#include <assert.h>
+#include <dirent.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "proc.h"
+
+#define PF_KHTREAD 0x00200000
+
+/*
+ * Test for kernel threadness atomically with openat().
+ *
+ * Return /proc/$PID/fd descriptor if process is kernel thread.
+ * Return -1 if a process is userspace process.
+ */
+static int kernel_thread_fd(unsigned int pid)
+{
+ unsigned int flags = 0;
+ char buf[4096];
+ int dir_fd, fd;
+ ssize_t rv;
+
+ snprintf(buf, sizeof(buf), "/proc/%u", pid);
+ dir_fd = open(buf, O_RDONLY|O_DIRECTORY);
+ if (dir_fd == -1)
+ return -1;
+
+ /*
+ * Believe it or not, struct task_struct::flags is directly exposed
+ * to userspace!
+ */
+ fd = openat(dir_fd, "stat", O_RDONLY);
+ if (fd == -1) {
+ close(dir_fd);
+ return -1;
+ }
+ rv = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (0 < rv && rv <= sizeof(buf)) {
+ unsigned long long flags_ull;
+ char *p, *end;
+ int i;
+
+ assert(buf[rv - 1] == '\n');
+ buf[rv - 1] = '\0';
+
+ /* Search backwards: ->comm can contain whitespace and ')'. */
+ for (i = 0; i < 43; i++) {
+ p = strrchr(buf, ' ');
+ assert(p);
+ *p = '\0';
+ }
+
+ p = strrchr(buf, ' ');
+ assert(p);
+
+ flags_ull = xstrtoull(p + 1, &end);
+ assert(*end == '\0');
+ assert(flags_ull == (unsigned int)flags_ull);
+
+ flags = flags_ull;
+ }
+
+ fd = -1;
+ if (flags & PF_KHTREAD) {
+ fd = openat(dir_fd, "fd", O_RDONLY|O_DIRECTORY);
+ }
+ close(dir_fd);
+ return fd;
+}
+
+static void test_readdir(int fd)
+{
+ DIR *d;
+ struct dirent *de;
+
+ d = fdopendir(fd);
+ assert(d);
+
+ de = xreaddir(d);
+ assert(streq(de->d_name, "."));
+ assert(de->d_type == DT_DIR);
+
+ de = xreaddir(d);
+ assert(streq(de->d_name, ".."));
+ assert(de->d_type == DT_DIR);
+
+ de = xreaddir(d);
+ assert(!de);
+}
+
+static inline int sys_statx(int dirfd, const char *pathname, int flags,
+ unsigned int mask, void *stx)
+{
+ return syscall(SYS_statx, dirfd, pathname, flags, mask, stx);
+}
+
+static void test_lookup_fail(int fd, const char *pathname)
+{
+ char stx[256] __attribute__((aligned(8)));
+ int rv;
+
+ rv = sys_statx(fd, pathname, AT_SYMLINK_NOFOLLOW, 0, (void *)stx);
+ assert(rv == -1 && errno == ENOENT);
+}
+
+static void test_lookup(int fd)
+{
+ char buf[64];
+ unsigned int u;
+ int i;
+
+ for (i = INT_MIN; i < INT_MIN + 1024; i++) {
+ snprintf(buf, sizeof(buf), "%d", i);
+ test_lookup_fail(fd, buf);
+ }
+ for (i = -1024; i < 1024; i++) {
+ snprintf(buf, sizeof(buf), "%d", i);
+ test_lookup_fail(fd, buf);
+ }
+ for (u = INT_MAX - 1024; u < (unsigned int)INT_MAX + 1024; u++) {
+ snprintf(buf, sizeof(buf), "%u", u);
+ test_lookup_fail(fd, buf);
+ }
+ for (u = UINT_MAX - 1024; u != 0; u++) {
+ snprintf(buf, sizeof(buf), "%u", u);
+ test_lookup_fail(fd, buf);
+ }
+}
+
+int main(void)
+{
+ unsigned int pid;
+ int fd;
+
+ /*
+ * In theory this will loop indefinitely if kernel threads are exiled
+ * from /proc.
+ *
+ * Start with kthreadd.
+ */
+ pid = 2;
+ while ((fd = kernel_thread_fd(pid)) == -1 && pid < 1024) {
+ pid++;
+ }
+ /* EACCES if run as non-root. */
+ if (pid >= 1024)
+ return 1;
+
+ test_readdir(fd);
+ test_lookup(fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c
new file mode 100644
index 000000000..8edaafc2b
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-loadavg-001.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test that /proc/loadavg correctly reports last pid in pid namespace. */
+#include <errno.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+int main(void)
+{
+ pid_t pid;
+ int wstatus;
+
+ if (unshare(CLONE_NEWPID) == -1) {
+ if (errno == ENOSYS || errno == EPERM)
+ return 2;
+ return 1;
+ }
+
+ pid = fork();
+ if (pid == -1)
+ return 1;
+ if (pid == 0) {
+ char buf[128], *p;
+ int fd;
+ ssize_t rv;
+
+ fd = open("/proc/loadavg" , O_RDONLY);
+ if (fd == -1)
+ return 1;
+ rv = read(fd, buf, sizeof(buf));
+ if (rv < 3)
+ return 1;
+ p = buf + rv;
+
+ /* pid 1 */
+ if (!(p[-3] == ' ' && p[-2] == '1' && p[-1] == '\n'))
+ return 1;
+
+ pid = fork();
+ if (pid == -1)
+ return 1;
+ if (pid == 0)
+ return 0;
+ if (waitpid(pid, NULL, 0) == -1)
+ return 1;
+
+ lseek(fd, 0, SEEK_SET);
+ rv = read(fd, buf, sizeof(buf));
+ if (rv < 3)
+ return 1;
+ p = buf + rv;
+
+ /* pid 2 */
+ if (!(p[-3] == ' ' && p[-2] == '2' && p[-1] == '\n'))
+ return 1;
+
+ return 0;
+ }
+
+ if (waitpid(pid, &wstatus, 0) == -1)
+ return 1;
+ if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0)
+ return 0;
+ return 1;
+}
diff --git a/tools/testing/selftests/proc/proc-self-map-files-001.c b/tools/testing/selftests/proc/proc-self-map-files-001.c
new file mode 100644
index 000000000..4209c6428
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-map-files-001.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test readlink /proc/self/map_files/... */
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+
+static void pass(const char *fmt, unsigned long a, unsigned long b)
+{
+ char name[64];
+ char buf[64];
+
+ snprintf(name, sizeof(name), fmt, a, b);
+ if (readlink(name, buf, sizeof(buf)) == -1)
+ exit(1);
+}
+
+static void fail(const char *fmt, unsigned long a, unsigned long b)
+{
+ char name[64];
+ char buf[64];
+
+ snprintf(name, sizeof(name), fmt, a, b);
+ if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT)
+ return;
+ exit(1);
+}
+
+int main(void)
+{
+ const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+ void *p;
+ int fd;
+ unsigned long a, b;
+
+ fd = open("/dev/zero", O_RDONLY);
+ if (fd == -1)
+ return 1;
+
+ p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE, fd, 0);
+ if (p == MAP_FAILED)
+ return 1;
+
+ a = (unsigned long)p;
+ b = (unsigned long)p + PAGE_SIZE;
+
+ pass("/proc/self/map_files/%lx-%lx", a, b);
+ fail("/proc/self/map_files/ %lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx -%lx", a, b);
+ fail("/proc/self/map_files/%lx- %lx", a, b);
+ fail("/proc/self/map_files/%lx-%lx ", a, b);
+ fail("/proc/self/map_files/0%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-0%lx", a, b);
+ if (sizeof(long) == 4) {
+ fail("/proc/self/map_files/100000000%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-100000000%lx", a, b);
+ } else if (sizeof(long) == 8) {
+ fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b);
+ } else
+ return 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c
new file mode 100644
index 000000000..85744425b
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-map-files-002.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test readlink /proc/self/map_files/... with minimum address. */
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+
+static void pass(const char *fmt, unsigned long a, unsigned long b)
+{
+ char name[64];
+ char buf[64];
+
+ snprintf(name, sizeof(name), fmt, a, b);
+ if (readlink(name, buf, sizeof(buf)) == -1)
+ exit(1);
+}
+
+static void fail(const char *fmt, unsigned long a, unsigned long b)
+{
+ char name[64];
+ char buf[64];
+
+ snprintf(name, sizeof(name), fmt, a, b);
+ if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT)
+ return;
+ exit(1);
+}
+
+int main(void)
+{
+ const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+#ifdef __arm__
+ unsigned long va = 2 * PAGE_SIZE;
+#else
+ unsigned long va = 0;
+#endif
+ void *p;
+ int fd;
+ unsigned long a, b;
+
+ fd = open("/dev/zero", O_RDONLY);
+ if (fd == -1)
+ return 1;
+
+ p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
+ if (p == MAP_FAILED) {
+ if (errno == EPERM)
+ return 2;
+ return 1;
+ }
+
+ a = (unsigned long)p;
+ b = (unsigned long)p + PAGE_SIZE;
+
+ pass("/proc/self/map_files/%lx-%lx", a, b);
+ fail("/proc/self/map_files/ %lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx -%lx", a, b);
+ fail("/proc/self/map_files/%lx- %lx", a, b);
+ fail("/proc/self/map_files/%lx-%lx ", a, b);
+ fail("/proc/self/map_files/0%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-0%lx", a, b);
+ if (sizeof(long) == 4) {
+ fail("/proc/self/map_files/100000000%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-100000000%lx", a, b);
+ } else if (sizeof(long) == 8) {
+ fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b);
+ } else
+ return 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c
new file mode 100644
index 000000000..7b9018fad
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-syscall.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+
+static inline ssize_t sys_read(int fd, void *buf, size_t len)
+{
+ return syscall(SYS_read, fd, buf, len);
+}
+
+int main(void)
+{
+ char buf1[64];
+ char buf2[64];
+ int fd;
+ ssize_t rv;
+
+ fd = open("/proc/self/syscall", O_RDONLY);
+ if (fd == -1) {
+ if (errno == ENOENT)
+ return 2;
+ return 1;
+ }
+
+ /* Do direct system call as libc can wrap anything. */
+ snprintf(buf1, sizeof(buf1), "%ld 0x%lx 0x%lx 0x%lx",
+ (long)SYS_read, (long)fd, (long)buf2, (long)sizeof(buf2));
+
+ memset(buf2, 0, sizeof(buf2));
+ rv = sys_read(fd, buf2, sizeof(buf2));
+ if (rv < 0)
+ return 1;
+ if (rv < strlen(buf1))
+ return 1;
+ if (strncmp(buf1, buf2, strlen(buf1)) != 0)
+ return 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-wchan.c b/tools/testing/selftests/proc/proc-self-wchan.c
new file mode 100644
index 000000000..a38b2fbaa
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-wchan.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+
+int main(void)
+{
+ char buf[64];
+ int fd;
+
+ fd = open("/proc/self/wchan", O_RDONLY);
+ if (fd == -1) {
+ if (errno == ENOENT)
+ return 2;
+ return 1;
+ }
+
+ buf[0] = '\0';
+ if (read(fd, buf, sizeof(buf)) != 1)
+ return 1;
+ if (buf[0] != '0')
+ return 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime-001.c b/tools/testing/selftests/proc/proc-uptime-001.c
new file mode 100644
index 000000000..781f7a50f
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime-001.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that values in /proc/uptime increment monotonically.
+#undef NDEBUG
+#include <assert.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc-uptime.h"
+
+int main(void)
+{
+ uint64_t start, u0, u1, i0, i1;
+ int fd;
+
+ fd = open("/proc/uptime", O_RDONLY);
+ assert(fd >= 0);
+
+ proc_uptime(fd, &u0, &i0);
+ start = u0;
+ do {
+ proc_uptime(fd, &u1, &i1);
+ assert(u1 >= u0);
+ assert(i1 >= i0);
+ u0 = u1;
+ i0 = i1;
+ } while (u1 - start < 100);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c
new file mode 100644
index 000000000..e7ceabed7
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime-002.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that values in /proc/uptime increment monotonically
+// while shifting across CPUs.
+#undef NDEBUG
+#include <assert.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc-uptime.h"
+
+static inline int sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long *m)
+{
+ return syscall(SYS_sched_getaffinity, pid, len, m);
+}
+
+static inline int sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned long *m)
+{
+ return syscall(SYS_sched_setaffinity, pid, len, m);
+}
+
+int main(void)
+{
+ unsigned int len;
+ unsigned long *m;
+ unsigned int cpu;
+ uint64_t u0, u1, i0, i1;
+ int fd;
+
+ /* find out "nr_cpu_ids" */
+ m = NULL;
+ len = 0;
+ do {
+ len += sizeof(unsigned long);
+ free(m);
+ m = malloc(len);
+ } while (sys_sched_getaffinity(0, len, m) == -EINVAL);
+
+ fd = open("/proc/uptime", O_RDONLY);
+ assert(fd >= 0);
+
+ proc_uptime(fd, &u0, &i0);
+ for (cpu = 0; cpu < len * 8; cpu++) {
+ memset(m, 0, len);
+ m[cpu / (8 * sizeof(unsigned long))] |= 1UL << (cpu % (8 * sizeof(unsigned long)));
+
+ /* CPU might not exist, ignore error */
+ sys_sched_setaffinity(0, len, m);
+
+ proc_uptime(fd, &u1, &i1);
+ assert(u1 >= u0);
+ assert(i1 >= i0);
+ u0 = u1;
+ i0 = i1;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h
new file mode 100644
index 000000000..dc6a42b1d
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "proc.h"
+
+static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle)
+{
+ uint64_t val1, val2;
+ char buf[64], *p;
+ ssize_t rv;
+
+ /* save "p < end" checks */
+ memset(buf, 0, sizeof(buf));
+ rv = pread(fd, buf, sizeof(buf), 0);
+ assert(0 <= rv && rv <= sizeof(buf));
+ buf[sizeof(buf) - 1] = '\0';
+
+ p = buf;
+
+ val1 = xstrtoull(p, &p);
+ assert(p[0] == '.');
+ assert('0' <= p[1] && p[1] <= '9');
+ assert('0' <= p[2] && p[2] <= '9');
+ assert(p[3] == ' ');
+
+ val2 = (p[1] - '0') * 10 + p[2] - '0';
+ *uptime = val1 * 100 + val2;
+
+ p += 4;
+
+ val1 = xstrtoull(p, &p);
+ assert(p[0] == '.');
+ assert('0' <= p[1] && p[1] <= '9');
+ assert('0' <= p[2] && p[2] <= '9');
+ assert(p[3] == '\n');
+
+ val2 = (p[1] - '0') * 10 + p[2] - '0';
+ *idle = val1 * 100 + val2;
+
+ assert(p + 4 == buf + rv);
+}
diff --git a/tools/testing/selftests/proc/proc.h b/tools/testing/selftests/proc/proc.h
new file mode 100644
index 000000000..b7d57ea40
--- /dev/null
+++ b/tools/testing/selftests/proc/proc.h
@@ -0,0 +1,51 @@
+#pragma once
+#undef NDEBUG
+#include <assert.h>
+#include <dirent.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static inline pid_t sys_getpid(void)
+{
+ return syscall(SYS_getpid);
+}
+
+static inline pid_t sys_gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+
+static inline bool streq(const char *s1, const char *s2)
+{
+ return strcmp(s1, s2) == 0;
+}
+
+static unsigned long long xstrtoull(const char *p, char **end)
+{
+ if (*p == '0') {
+ *end = (char *)p + 1;
+ return 0;
+ } else if ('1' <= *p && *p <= '9') {
+ unsigned long long val;
+
+ errno = 0;
+ val = strtoull(p, end, 10);
+ assert(errno == 0);
+ return val;
+ } else
+ assert(0);
+}
+
+static struct dirent *xreaddir(DIR *d)
+{
+ struct dirent *de;
+
+ errno = 0;
+ de = readdir(d);
+ assert(de || errno == 0);
+ return de;
+}
diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c
new file mode 100644
index 000000000..563e752e6
--- /dev/null
+++ b/tools/testing/selftests/proc/read.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test
+// 1) read of every file in /proc
+// 2) readlink of every symlink in /proc
+// 3) recursively (1) + (2) for every directory in /proc
+// 4) write to /proc/*/clear_refs and /proc/*/task/*/clear_refs
+// 5) write to /proc/sysrq-trigger
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "proc.h"
+
+static void f_reg(DIR *d, const char *filename)
+{
+ char buf[4096];
+ int fd;
+ ssize_t rv;
+
+ /* read from /proc/kmsg can block */
+ fd = openat(dirfd(d), filename, O_RDONLY|O_NONBLOCK);
+ if (fd == -1)
+ return;
+ rv = read(fd, buf, sizeof(buf));
+ assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
+ close(fd);
+}
+
+static void f_reg_write(DIR *d, const char *filename, const char *buf, size_t len)
+{
+ int fd;
+ ssize_t rv;
+
+ fd = openat(dirfd(d), filename, O_WRONLY);
+ if (fd == -1)
+ return;
+ rv = write(fd, buf, len);
+ assert((0 <= rv && rv <= len) || rv == -1);
+ close(fd);
+}
+
+static void f_lnk(DIR *d, const char *filename)
+{
+ char buf[4096];
+ ssize_t rv;
+
+ rv = readlinkat(dirfd(d), filename, buf, sizeof(buf));
+ assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
+}
+
+static void f(DIR *d, unsigned int level)
+{
+ struct dirent *de;
+
+ de = xreaddir(d);
+ assert(de->d_type == DT_DIR);
+ assert(streq(de->d_name, "."));
+
+ de = xreaddir(d);
+ assert(de->d_type == DT_DIR);
+ assert(streq(de->d_name, ".."));
+
+ while ((de = xreaddir(d))) {
+ assert(!streq(de->d_name, "."));
+ assert(!streq(de->d_name, ".."));
+
+ switch (de->d_type) {
+ DIR *dd;
+ int fd;
+
+ case DT_REG:
+ if (level == 0 && streq(de->d_name, "sysrq-trigger")) {
+ f_reg_write(d, de->d_name, "h", 1);
+ } else if (level == 1 && streq(de->d_name, "clear_refs")) {
+ f_reg_write(d, de->d_name, "1", 1);
+ } else if (level == 3 && streq(de->d_name, "clear_refs")) {
+ f_reg_write(d, de->d_name, "1", 1);
+ } else {
+ f_reg(d, de->d_name);
+ }
+ break;
+ case DT_DIR:
+ fd = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY);
+ if (fd == -1)
+ continue;
+ dd = fdopendir(fd);
+ if (!dd)
+ continue;
+ f(dd, level + 1);
+ closedir(dd);
+ break;
+ case DT_LNK:
+ f_lnk(d, de->d_name);
+ break;
+ default:
+ assert(0);
+ }
+ }
+}
+
+int main(void)
+{
+ DIR *d;
+
+ d = opendir("/proc");
+ if (!d)
+ return 2;
+ f(d, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/self.c b/tools/testing/selftests/proc/self.c
new file mode 100644
index 000000000..21c15a1ff
--- /dev/null
+++ b/tools/testing/selftests/proc/self.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that /proc/self gives correct TGID.
+#undef NDEBUG
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "proc.h"
+
+int main(void)
+{
+ char buf1[64], buf2[64];
+ pid_t pid;
+ ssize_t rv;
+
+ pid = sys_getpid();
+ snprintf(buf1, sizeof(buf1), "%u", pid);
+
+ rv = readlink("/proc/self", buf2, sizeof(buf2));
+ assert(rv == strlen(buf1));
+ buf2[rv] = '\0';
+ assert(streq(buf1, buf2));
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/setns-dcache.c b/tools/testing/selftests/proc/setns-dcache.c
new file mode 100644
index 000000000..60ab197a7
--- /dev/null
+++ b/tools/testing/selftests/proc/setns-dcache.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright © 2019 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Test that setns(CLONE_NEWNET) points to new /proc/net content even
+ * if old one is in dcache.
+ *
+ * FIXME /proc/net/unix is under CONFIG_UNIX which can be disabled.
+ */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+
+static pid_t pid = -1;
+
+static void f(void)
+{
+ if (pid > 0) {
+ kill(pid, SIGTERM);
+ }
+}
+
+int main(void)
+{
+ int fd[2];
+ char _ = 0;
+ int nsfd;
+
+ atexit(f);
+
+ /* Check for priviledges and syscall availability straight away. */
+ if (unshare(CLONE_NEWNET) == -1) {
+ if (errno == ENOSYS || errno == EPERM) {
+ return 4;
+ }
+ return 1;
+ }
+ /* Distinguisher between two otherwise empty net namespaces. */
+ if (socket(AF_UNIX, SOCK_STREAM, 0) == -1) {
+ return 1;
+ }
+
+ if (pipe(fd) == -1) {
+ return 1;
+ }
+
+ pid = fork();
+ if (pid == -1) {
+ return 1;
+ }
+
+ if (pid == 0) {
+ if (unshare(CLONE_NEWNET) == -1) {
+ return 1;
+ }
+
+ if (write(fd[1], &_, 1) != 1) {
+ return 1;
+ }
+
+ pause();
+
+ return 0;
+ }
+
+ if (read(fd[0], &_, 1) != 1) {
+ return 1;
+ }
+
+ {
+ char buf[64];
+ snprintf(buf, sizeof(buf), "/proc/%u/ns/net", pid);
+ nsfd = open(buf, O_RDONLY);
+ if (nsfd == -1) {
+ return 1;
+ }
+ }
+
+ /* Reliably pin dentry into dcache. */
+ (void)open("/proc/net/unix", O_RDONLY);
+
+ if (setns(nsfd, CLONE_NEWNET) == -1) {
+ return 1;
+ }
+
+ kill(pid, SIGTERM);
+ pid = 0;
+
+ {
+ char buf[4096];
+ ssize_t rv;
+ int fd;
+
+ fd = open("/proc/net/unix", O_RDONLY);
+ if (fd == -1) {
+ return 1;
+ }
+
+#define S "Num RefCount Protocol Flags Type St Inode Path\n"
+ rv = read(fd, buf, sizeof(buf));
+
+ assert(rv == strlen(S));
+ assert(memcmp(buf, S, strlen(S)) == 0);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/thread-self.c b/tools/testing/selftests/proc/thread-self.c
new file mode 100644
index 000000000..4b23b39b7
--- /dev/null
+++ b/tools/testing/selftests/proc/thread-self.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that /proc/thread-self gives correct TGID/PID.
+#undef NDEBUG
+#include <assert.h>
+#include <sched.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+
+#include "proc.h"
+
+int f(void *arg)
+{
+ char buf1[64], buf2[64];
+ pid_t pid, tid;
+ ssize_t rv;
+
+ pid = sys_getpid();
+ tid = sys_gettid();
+ snprintf(buf1, sizeof(buf1), "%u/task/%u", pid, tid);
+
+ rv = readlink("/proc/thread-self", buf2, sizeof(buf2));
+ assert(rv == strlen(buf1));
+ buf2[rv] = '\0';
+ assert(streq(buf1, buf2));
+
+ if (arg)
+ exit(0);
+ return 0;
+}
+
+int main(void)
+{
+ const int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+ pid_t pid;
+ void *stack;
+
+ /* main thread */
+ f((void *)0);
+
+ stack = mmap(NULL, 2 * PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ assert(stack != MAP_FAILED);
+ /* side thread */
+ pid = clone(f, stack + PAGE_SIZE, CLONE_THREAD|CLONE_SIGHAND|CLONE_VM, (void *)1);
+ assert(pid > 0);
+ pause();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/pstore/.gitignore b/tools/testing/selftests/pstore/.gitignore
new file mode 100644
index 000000000..5a4a26e54
--- /dev/null
+++ b/tools/testing/selftests/pstore/.gitignore
@@ -0,0 +1,2 @@
+logs
+*uuid
diff --git a/tools/testing/selftests/pstore/Makefile b/tools/testing/selftests/pstore/Makefile
new file mode 100644
index 000000000..5ef57855a
--- /dev/null
+++ b/tools/testing/selftests/pstore/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for pstore selftests.
+# Expects pstore backend is registered.
+
+all:
+
+TEST_PROGS := pstore_tests pstore_post_reboot_tests
+TEST_FILES := common_tests pstore_crash_test
+EXTRA_CLEAN := logs/* *uuid
+
+include ../lib.mk
+
+run_crash:
+ @sh pstore_crash_test || { echo "pstore_crash_test: [FAIL]"; exit 1; }
diff --git a/tools/testing/selftests/pstore/common_tests b/tools/testing/selftests/pstore/common_tests
new file mode 100755
index 000000000..3ea64d7cf
--- /dev/null
+++ b/tools/testing/selftests/pstore/common_tests
@@ -0,0 +1,83 @@
+#!/bin/sh
+
+# common_tests - Shell script commonly used by pstore test scripts
+#
+# Copyright (C) Hitachi Ltd., 2015
+# Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+# Utilities
+errexit() { # message
+ echo "Error: $1" 1>&2
+ exit 1
+}
+
+absdir() { # file_path
+ (cd `dirname $1`; pwd)
+}
+
+show_result() { # result_value
+ if [ $1 -eq 0 ]; then
+ prlog "ok"
+ else
+ prlog "FAIL"
+ rc=1
+ fi
+}
+
+check_files_exist() { # type of pstorefs file
+ if [ -e ${1}-${backend}-0 ]; then
+ prlog "ok"
+ for f in `ls ${1}-${backend}-*`; do
+ prlog -e "\t${f}"
+ done
+ else
+ prlog "FAIL"
+ rc=1
+ fi
+}
+
+operate_files() { # tested value, files, operation
+ if [ $1 -eq 0 ]; then
+ prlog
+ for f in $2; do
+ prlog -ne "\t${f} ... "
+ # execute operation
+ $3 $f
+ show_result $?
+ done
+ else
+ prlog " ... FAIL"
+ rc=1
+ fi
+}
+
+# Parameters
+TEST_STRING_PATTERN="Testing pstore: uuid="
+UUID=`cat /proc/sys/kernel/random/uuid`
+TOP_DIR=`absdir $0`
+LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`_${UUID}/
+REBOOT_FLAG=$TOP_DIR/reboot_flag
+
+# Preparing logs
+LOG_FILE=$LOG_DIR/`basename $0`.log
+mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR"
+date > $LOG_FILE
+prlog() { # messages
+ /bin/echo "$@" | tee -a $LOG_FILE
+}
+
+# Starting tests
+rc=0
+prlog "=== Pstore unit tests (`basename $0`) ==="
+prlog "UUID="$UUID
+
+prlog -n "Checking pstore backend is registered ... "
+backend=`cat /sys/module/pstore/parameters/backend`
+show_result $?
+prlog -e "\tbackend=${backend}"
+prlog -e "\tcmdline=`cat /proc/cmdline`"
+if [ $rc -ne 0 ]; then
+ exit 1
+fi
diff --git a/tools/testing/selftests/pstore/config b/tools/testing/selftests/pstore/config
new file mode 100644
index 000000000..d148f9f89
--- /dev/null
+++ b/tools/testing/selftests/pstore/config
@@ -0,0 +1,5 @@
+CONFIG_MISC_FILESYSTEMS=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_PMSG=y
+CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=m
diff --git a/tools/testing/selftests/pstore/pstore_crash_test b/tools/testing/selftests/pstore/pstore_crash_test
new file mode 100755
index 000000000..1a4afe5c1
--- /dev/null
+++ b/tools/testing/selftests/pstore/pstore_crash_test
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+# pstore_crash_test - Pstore test shell script which causes crash and reboot
+#
+# Copyright (C) Hitachi Ltd., 2015
+# Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+# exit if pstore backend is not registered
+. ./common_tests
+
+prlog "Causing kernel crash ..."
+
+# enable all functions triggered by sysrq
+echo 1 > /proc/sys/kernel/sysrq
+# setting to reboot in 3 seconds after panic
+echo 3 > /proc/sys/kernel/panic
+
+# save uuid file by different name because next test execution will replace it.
+mv $TOP_DIR/uuid $TOP_DIR/prev_uuid
+
+# create a file as reboot flag
+touch $REBOOT_FLAG
+sync
+
+# cause crash
+# Note: If you use kdump and want to see kmesg-* files after reboot, you should
+# specify 'crash_kexec_post_notifiers' in 1st kernel's cmdline.
+echo c > /proc/sysrq-trigger
diff --git a/tools/testing/selftests/pstore/pstore_post_reboot_tests b/tools/testing/selftests/pstore/pstore_post_reboot_tests
new file mode 100755
index 000000000..22f8df1ad
--- /dev/null
+++ b/tools/testing/selftests/pstore/pstore_post_reboot_tests
@@ -0,0 +1,80 @@
+#!/bin/sh
+
+# pstore_post_reboot_tests - Check pstore's behavior after crash/reboot
+#
+# Copyright (C) Hitachi Ltd., 2015
+# Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+. ./common_tests
+
+if [ -e $REBOOT_FLAG ]; then
+ rm $REBOOT_FLAG
+else
+ prlog "pstore_crash_test has not been executed yet. we skip further tests."
+ exit $ksft_skip
+fi
+
+prlog -n "Mounting pstore filesystem ... "
+mount_info=`grep pstore /proc/mounts`
+if [ $? -eq 0 ]; then
+ mount_point=`echo ${mount_info} | cut -d' ' -f2 | head -n1`
+ prlog "ok"
+else
+ mount none /sys/fs/pstore -t pstore
+ if [ $? -eq 0 ]; then
+ mount_point=`grep pstore /proc/mounts | cut -d' ' -f2 | head -n1`
+ prlog "ok"
+ else
+ prlog "FAIL"
+ exit 1
+ fi
+fi
+
+cd ${mount_point}
+
+prlog -n "Checking dmesg files exist in pstore filesystem ... "
+check_files_exist dmesg
+
+prlog -n "Checking console files exist in pstore filesystem ... "
+check_files_exist console
+
+prlog -n "Checking pmsg files exist in pstore filesystem ... "
+check_files_exist pmsg
+
+prlog -n "Checking dmesg files contain oops end marker"
+grep_end_trace() {
+ grep -q "\---\[ end trace" $1
+}
+files=`ls dmesg-${backend}-*`
+operate_files $? "$files" grep_end_trace
+
+prlog -n "Checking console file contains oops end marker ... "
+grep -q "\---\[ end trace" console-${backend}-0
+show_result $?
+
+prlog -n "Checking pmsg file properly keeps the content written before crash ... "
+prev_uuid=`cat $TOP_DIR/prev_uuid`
+if [ $? -eq 0 ]; then
+ nr_matched=`grep -c "$TEST_STRING_PATTERN" pmsg-${backend}-0`
+ if [ $nr_matched -eq 1 ]; then
+ grep -q "$TEST_STRING_PATTERN"$prev_uuid pmsg-${backend}-0
+ show_result $?
+ else
+ prlog "FAIL"
+ rc=1
+ fi
+else
+ prlog "FAIL"
+ rc=1
+fi
+
+prlog -n "Removing all files in pstore filesystem "
+files=`ls *-${backend}-*`
+operate_files $? "$files" rm
+
+exit $rc
diff --git a/tools/testing/selftests/pstore/pstore_tests b/tools/testing/selftests/pstore/pstore_tests
new file mode 100755
index 000000000..f25d2a349
--- /dev/null
+++ b/tools/testing/selftests/pstore/pstore_tests
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+# pstore_tests - Check pstore's behavior before crash/reboot
+#
+# Copyright (C) Hitachi Ltd., 2015
+# Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+. ./common_tests
+
+prlog -n "Checking pstore console is registered ... "
+dmesg | grep -q "console \[pstore"
+show_result $?
+
+prlog -n "Checking /dev/pmsg0 exists ... "
+test -e /dev/pmsg0
+show_result $?
+
+prlog -n "Writing unique string to /dev/pmsg0 ... "
+if [ -e "/dev/pmsg0" ]; then
+ echo "${TEST_STRING_PATTERN}""$UUID" > /dev/pmsg0
+ show_result $?
+ echo "$UUID" > $TOP_DIR/uuid
+else
+ prlog "FAIL"
+ rc=1
+fi
+
+exit $rc
diff --git a/tools/testing/selftests/ptp/.gitignore b/tools/testing/selftests/ptp/.gitignore
new file mode 100644
index 000000000..f562e49d6
--- /dev/null
+++ b/tools/testing/selftests/ptp/.gitignore
@@ -0,0 +1 @@
+testptp
diff --git a/tools/testing/selftests/ptp/Makefile b/tools/testing/selftests/ptp/Makefile
new file mode 100644
index 000000000..ef06de089
--- /dev/null
+++ b/tools/testing/selftests/ptp/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -I../../../../usr/include/
+TEST_PROGS := testptp
+LDLIBS += -lrt
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+ rm -fr $(TEST_PROGS)
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
new file mode 100644
index 000000000..a5d8f0ab0
--- /dev/null
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -0,0 +1,521 @@
+/*
+ * PTP 1588 clock support - User space test program
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <math.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/ptp_clock.h>
+
+#define DEVICE "/dev/ptp0"
+
+#ifndef ADJ_SETOFFSET
+#define ADJ_SETOFFSET 0x0100
+#endif
+
+#ifndef CLOCK_INVALID
+#define CLOCK_INVALID -1
+#endif
+
+/* clock_adjtime is not available in GLIBC < 2.14 */
+#if !__GLIBC_PREREQ(2, 14)
+#include <sys/syscall.h>
+static int clock_adjtime(clockid_t id, struct timex *tx)
+{
+ return syscall(__NR_clock_adjtime, id, tx);
+}
+#endif
+
+static clockid_t get_clockid(int fd)
+{
+#define CLOCKFD 3
+ return (((unsigned int) ~fd) << 3) | CLOCKFD;
+}
+
+static void handle_alarm(int s)
+{
+ printf("received signal %d\n", s);
+}
+
+static int install_handler(int signum, void (*handler)(int))
+{
+ struct sigaction action;
+ sigset_t mask;
+
+ /* Unblock the signal. */
+ sigemptyset(&mask);
+ sigaddset(&mask, signum);
+ sigprocmask(SIG_UNBLOCK, &mask, NULL);
+
+ /* Install the signal handler. */
+ action.sa_handler = handler;
+ action.sa_flags = 0;
+ sigemptyset(&action.sa_mask);
+ sigaction(signum, &action, NULL);
+
+ return 0;
+}
+
+static long ppb_to_scaled_ppm(int ppb)
+{
+ /*
+ * The 'freq' field in the 'struct timex' is in parts per
+ * million, but with a 16 bit binary fractional field.
+ * Instead of calculating either one of
+ *
+ * scaled_ppm = (ppb / 1000) << 16 [1]
+ * scaled_ppm = (ppb << 16) / 1000 [2]
+ *
+ * we simply use double precision math, in order to avoid the
+ * truncation in [1] and the possible overflow in [2].
+ */
+ return (long) (ppb * 65.536);
+}
+
+static int64_t pctns(struct ptp_clock_time *t)
+{
+ return t->sec * 1000000000LL + t->nsec;
+}
+
+static void usage(char *progname)
+{
+ fprintf(stderr,
+ "usage: %s [options]\n"
+ " -a val request a one-shot alarm after 'val' seconds\n"
+ " -A val request a periodic alarm every 'val' seconds\n"
+ " -c query the ptp clock's capabilities\n"
+ " -d name device to open\n"
+ " -e val read 'val' external time stamp events\n"
+ " -f val adjust the ptp clock frequency by 'val' ppb\n"
+ " -g get the ptp clock time\n"
+ " -h prints this message\n"
+ " -i val index for event/trigger\n"
+ " -k val measure the time offset between system and phc clock\n"
+ " for 'val' times (Maximum 25)\n"
+ " -l list the current pin configuration\n"
+ " -L pin,val configure pin index 'pin' with function 'val'\n"
+ " the channel index is taken from the '-i' option\n"
+ " 'val' specifies the auxiliary function:\n"
+ " 0 - none\n"
+ " 1 - external time stamp\n"
+ " 2 - periodic output\n"
+ " -p val enable output with a period of 'val' nanoseconds\n"
+ " -P val enable or disable (val=1|0) the system clock PPS\n"
+ " -s set the ptp clock time from the system time\n"
+ " -S set the system time from the ptp clock time\n"
+ " -t val shift the ptp clock time by 'val' seconds\n"
+ " -T val set the ptp clock time to 'val' seconds\n",
+ progname);
+}
+
+int main(int argc, char *argv[])
+{
+ struct ptp_clock_caps caps;
+ struct ptp_extts_event event;
+ struct ptp_extts_request extts_request;
+ struct ptp_perout_request perout_request;
+ struct ptp_pin_desc desc;
+ struct timespec ts;
+ struct timex tx;
+
+ static timer_t timerid;
+ struct itimerspec timeout;
+ struct sigevent sigevent;
+
+ struct ptp_clock_time *pct;
+ struct ptp_sys_offset *sysoff;
+
+
+ char *progname;
+ unsigned int i;
+ int c, cnt, fd;
+
+ char *device = DEVICE;
+ clockid_t clkid;
+ int adjfreq = 0x7fffffff;
+ int adjtime = 0;
+ int capabilities = 0;
+ int extts = 0;
+ int gettime = 0;
+ int index = 0;
+ int list_pins = 0;
+ int oneshot = 0;
+ int pct_offset = 0;
+ int n_samples = 0;
+ int periodic = 0;
+ int perout = -1;
+ int pin_index = -1, pin_func;
+ int pps = -1;
+ int seconds = 0;
+ int settime = 0;
+
+ int64_t t1, t2, tp;
+ int64_t interval, offset;
+
+ progname = strrchr(argv[0], '/');
+ progname = progname ? 1+progname : argv[0];
+ while (EOF != (c = getopt(argc, argv, "a:A:cd:e:f:ghi:k:lL:p:P:sSt:T:v"))) {
+ switch (c) {
+ case 'a':
+ oneshot = atoi(optarg);
+ break;
+ case 'A':
+ periodic = atoi(optarg);
+ break;
+ case 'c':
+ capabilities = 1;
+ break;
+ case 'd':
+ device = optarg;
+ break;
+ case 'e':
+ extts = atoi(optarg);
+ break;
+ case 'f':
+ adjfreq = atoi(optarg);
+ break;
+ case 'g':
+ gettime = 1;
+ break;
+ case 'i':
+ index = atoi(optarg);
+ break;
+ case 'k':
+ pct_offset = 1;
+ n_samples = atoi(optarg);
+ break;
+ case 'l':
+ list_pins = 1;
+ break;
+ case 'L':
+ cnt = sscanf(optarg, "%d,%d", &pin_index, &pin_func);
+ if (cnt != 2) {
+ usage(progname);
+ return -1;
+ }
+ break;
+ case 'p':
+ perout = atoi(optarg);
+ break;
+ case 'P':
+ pps = atoi(optarg);
+ break;
+ case 's':
+ settime = 1;
+ break;
+ case 'S':
+ settime = 2;
+ break;
+ case 't':
+ adjtime = atoi(optarg);
+ break;
+ case 'T':
+ settime = 3;
+ seconds = atoi(optarg);
+ break;
+ case 'h':
+ usage(progname);
+ return 0;
+ case '?':
+ default:
+ usage(progname);
+ return -1;
+ }
+ }
+
+ fd = open(device, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "opening %s: %s\n", device, strerror(errno));
+ return -1;
+ }
+
+ clkid = get_clockid(fd);
+ if (CLOCK_INVALID == clkid) {
+ fprintf(stderr, "failed to read clock id\n");
+ return -1;
+ }
+
+ if (capabilities) {
+ if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
+ perror("PTP_CLOCK_GETCAPS");
+ } else {
+ printf("capabilities:\n"
+ " %d maximum frequency adjustment (ppb)\n"
+ " %d programmable alarms\n"
+ " %d external time stamp channels\n"
+ " %d programmable periodic signals\n"
+ " %d pulse per second\n"
+ " %d programmable pins\n"
+ " %d cross timestamping\n",
+ caps.max_adj,
+ caps.n_alarm,
+ caps.n_ext_ts,
+ caps.n_per_out,
+ caps.pps,
+ caps.n_pins,
+ caps.cross_timestamping);
+ }
+ }
+
+ if (0x7fffffff != adjfreq) {
+ memset(&tx, 0, sizeof(tx));
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = ppb_to_scaled_ppm(adjfreq);
+ if (clock_adjtime(clkid, &tx)) {
+ perror("clock_adjtime");
+ } else {
+ puts("frequency adjustment okay");
+ }
+ }
+
+ if (adjtime) {
+ memset(&tx, 0, sizeof(tx));
+ tx.modes = ADJ_SETOFFSET;
+ tx.time.tv_sec = adjtime;
+ tx.time.tv_usec = 0;
+ if (clock_adjtime(clkid, &tx) < 0) {
+ perror("clock_adjtime");
+ } else {
+ puts("time shift okay");
+ }
+ }
+
+ if (gettime) {
+ if (clock_gettime(clkid, &ts)) {
+ perror("clock_gettime");
+ } else {
+ printf("clock time: %ld.%09ld or %s",
+ ts.tv_sec, ts.tv_nsec, ctime(&ts.tv_sec));
+ }
+ }
+
+ if (settime == 1) {
+ clock_gettime(CLOCK_REALTIME, &ts);
+ if (clock_settime(clkid, &ts)) {
+ perror("clock_settime");
+ } else {
+ puts("set time okay");
+ }
+ }
+
+ if (settime == 2) {
+ clock_gettime(clkid, &ts);
+ if (clock_settime(CLOCK_REALTIME, &ts)) {
+ perror("clock_settime");
+ } else {
+ puts("set time okay");
+ }
+ }
+
+ if (settime == 3) {
+ ts.tv_sec = seconds;
+ ts.tv_nsec = 0;
+ if (clock_settime(clkid, &ts)) {
+ perror("clock_settime");
+ } else {
+ puts("set time okay");
+ }
+ }
+
+ if (extts) {
+ memset(&extts_request, 0, sizeof(extts_request));
+ extts_request.index = index;
+ extts_request.flags = PTP_ENABLE_FEATURE;
+ if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+ perror("PTP_EXTTS_REQUEST");
+ extts = 0;
+ } else {
+ puts("external time stamp request okay");
+ }
+ for (; extts; extts--) {
+ cnt = read(fd, &event, sizeof(event));
+ if (cnt != sizeof(event)) {
+ perror("read");
+ break;
+ }
+ printf("event index %u at %lld.%09u\n", event.index,
+ event.t.sec, event.t.nsec);
+ fflush(stdout);
+ }
+ /* Disable the feature again. */
+ extts_request.flags = 0;
+ if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+ perror("PTP_EXTTS_REQUEST");
+ }
+ }
+
+ if (list_pins) {
+ int n_pins = 0;
+ if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
+ perror("PTP_CLOCK_GETCAPS");
+ } else {
+ n_pins = caps.n_pins;
+ }
+ for (i = 0; i < n_pins; i++) {
+ desc.index = i;
+ if (ioctl(fd, PTP_PIN_GETFUNC, &desc)) {
+ perror("PTP_PIN_GETFUNC");
+ break;
+ }
+ printf("name %s index %u func %u chan %u\n",
+ desc.name, desc.index, desc.func, desc.chan);
+ }
+ }
+
+ if (oneshot) {
+ install_handler(SIGALRM, handle_alarm);
+ /* Create a timer. */
+ sigevent.sigev_notify = SIGEV_SIGNAL;
+ sigevent.sigev_signo = SIGALRM;
+ if (timer_create(clkid, &sigevent, &timerid)) {
+ perror("timer_create");
+ return -1;
+ }
+ /* Start the timer. */
+ memset(&timeout, 0, sizeof(timeout));
+ timeout.it_value.tv_sec = oneshot;
+ if (timer_settime(timerid, 0, &timeout, NULL)) {
+ perror("timer_settime");
+ return -1;
+ }
+ pause();
+ timer_delete(timerid);
+ }
+
+ if (periodic) {
+ install_handler(SIGALRM, handle_alarm);
+ /* Create a timer. */
+ sigevent.sigev_notify = SIGEV_SIGNAL;
+ sigevent.sigev_signo = SIGALRM;
+ if (timer_create(clkid, &sigevent, &timerid)) {
+ perror("timer_create");
+ return -1;
+ }
+ /* Start the timer. */
+ memset(&timeout, 0, sizeof(timeout));
+ timeout.it_interval.tv_sec = periodic;
+ timeout.it_value.tv_sec = periodic;
+ if (timer_settime(timerid, 0, &timeout, NULL)) {
+ perror("timer_settime");
+ return -1;
+ }
+ while (1) {
+ pause();
+ }
+ timer_delete(timerid);
+ }
+
+ if (perout >= 0) {
+ if (clock_gettime(clkid, &ts)) {
+ perror("clock_gettime");
+ return -1;
+ }
+ memset(&perout_request, 0, sizeof(perout_request));
+ perout_request.index = index;
+ perout_request.start.sec = ts.tv_sec + 2;
+ perout_request.start.nsec = 0;
+ perout_request.period.sec = 0;
+ perout_request.period.nsec = perout;
+ if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) {
+ perror("PTP_PEROUT_REQUEST");
+ } else {
+ puts("periodic output request okay");
+ }
+ }
+
+ if (pin_index >= 0) {
+ memset(&desc, 0, sizeof(desc));
+ desc.index = pin_index;
+ desc.func = pin_func;
+ desc.chan = index;
+ if (ioctl(fd, PTP_PIN_SETFUNC, &desc)) {
+ perror("PTP_PIN_SETFUNC");
+ } else {
+ puts("set pin function okay");
+ }
+ }
+
+ if (pps != -1) {
+ int enable = pps ? 1 : 0;
+ if (ioctl(fd, PTP_ENABLE_PPS, enable)) {
+ perror("PTP_ENABLE_PPS");
+ } else {
+ puts("pps for system time request okay");
+ }
+ }
+
+ if (pct_offset) {
+ if (n_samples <= 0 || n_samples > 25) {
+ puts("n_samples should be between 1 and 25");
+ usage(progname);
+ return -1;
+ }
+
+ sysoff = calloc(1, sizeof(*sysoff));
+ if (!sysoff) {
+ perror("calloc");
+ return -1;
+ }
+ sysoff->n_samples = n_samples;
+
+ if (ioctl(fd, PTP_SYS_OFFSET, sysoff))
+ perror("PTP_SYS_OFFSET");
+ else
+ puts("system and phc clock time offset request okay");
+
+ pct = &sysoff->ts[0];
+ for (i = 0; i < sysoff->n_samples; i++) {
+ t1 = pctns(pct+2*i);
+ tp = pctns(pct+2*i+1);
+ t2 = pctns(pct+2*i+2);
+ interval = t2 - t1;
+ offset = (t2 + t1) / 2 - tp;
+
+ printf("system time: %lld.%u\n",
+ (pct+2*i)->sec, (pct+2*i)->nsec);
+ printf("phc time: %lld.%u\n",
+ (pct+2*i+1)->sec, (pct+2*i+1)->nsec);
+ printf("system time: %lld.%u\n",
+ (pct+2*i+2)->sec, (pct+2*i+2)->nsec);
+ printf("system/phc clock time offset is %" PRId64 " ns\n"
+ "system clock time delay is %" PRId64 " ns\n",
+ offset, interval);
+ }
+
+ free(sysoff);
+ }
+
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/ptp/testptp.mk b/tools/testing/selftests/ptp/testptp.mk
new file mode 100644
index 000000000..4ef2d9755
--- /dev/null
+++ b/tools/testing/selftests/ptp/testptp.mk
@@ -0,0 +1,33 @@
+# PTP 1588 clock support - User space test program
+#
+# Copyright (C) 2010 OMICRON electronics GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+CC = $(CROSS_COMPILE)gcc
+INC = -I$(KBUILD_OUTPUT)/usr/include
+CFLAGS = -Wall $(INC)
+LDLIBS = -lrt
+PROGS = testptp
+
+all: $(PROGS)
+
+testptp: testptp.o
+
+clean:
+ rm -f testptp.o
+
+distclean: clean
+ rm -f $(PROGS)
diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore
new file mode 100644
index 000000000..b3e59d41f
--- /dev/null
+++ b/tools/testing/selftests/ptrace/.gitignore
@@ -0,0 +1 @@
+peeksiginfo
diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile
new file mode 100644
index 000000000..8a2bc5562
--- /dev/null
+++ b/tools/testing/selftests/ptrace/Makefile
@@ -0,0 +1,5 @@
+CFLAGS += -iquote../../../../include/uapi -Wall
+
+TEST_GEN_PROGS := peeksiginfo
+
+include ../lib.mk
diff --git a/tools/testing/selftests/ptrace/peeksiginfo.c b/tools/testing/selftests/ptrace/peeksiginfo.c
new file mode 100644
index 000000000..54900657e
--- /dev/null
+++ b/tools/testing/selftests/ptrace/peeksiginfo.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/user.h>
+#include <sys/mman.h>
+
+#include "linux/ptrace.h"
+
+static int sys_rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t *uinfo)
+{
+ return syscall(SYS_rt_sigqueueinfo, tgid, sig, uinfo);
+}
+
+static int sys_rt_tgsigqueueinfo(pid_t tgid, pid_t tid,
+ int sig, siginfo_t *uinfo)
+{
+ return syscall(SYS_rt_tgsigqueueinfo, tgid, tid, sig, uinfo);
+}
+
+static int sys_ptrace(int request, pid_t pid, void *addr, void *data)
+{
+ return syscall(SYS_ptrace, request, pid, addr, data);
+}
+
+#define SIGNR 10
+#define TEST_SICODE_PRIV -1
+#define TEST_SICODE_SHARE -2
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+#endif
+
+#define err(fmt, ...) \
+ fprintf(stderr, \
+ "Error (%s:%d): " fmt, \
+ __FILE__, __LINE__, ##__VA_ARGS__)
+
+static int check_error_paths(pid_t child)
+{
+ struct ptrace_peeksiginfo_args arg;
+ int ret, exit_code = -1;
+ void *addr_rw, *addr_ro;
+
+ /*
+ * Allocate two contiguous pages. The first one is for read-write,
+ * another is for read-only.
+ */
+ addr_rw = mmap(NULL, 2 * PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr_rw == MAP_FAILED) {
+ err("mmap() failed: %m\n");
+ return 1;
+ }
+
+ addr_ro = mmap(addr_rw + PAGE_SIZE, PAGE_SIZE, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+ if (addr_ro == MAP_FAILED) {
+ err("mmap() failed: %m\n");
+ goto out;
+ }
+
+ arg.nr = SIGNR;
+ arg.off = 0;
+
+ /* Unsupported flags */
+ arg.flags = ~0;
+ ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_rw);
+ if (ret != -1 || errno != EINVAL) {
+ err("sys_ptrace() returns %d (expected -1),"
+ " errno %d (expected %d): %m\n",
+ ret, errno, EINVAL);
+ goto out;
+ }
+ arg.flags = 0;
+
+ /* A part of the buffer is read-only */
+ ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg,
+ addr_ro - sizeof(siginfo_t) * 2);
+ if (ret != 2) {
+ err("sys_ptrace() returns %d (expected 2): %m\n", ret);
+ goto out;
+ }
+
+ /* Read-only buffer */
+ ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_ro);
+ if (ret != -1 && errno != EFAULT) {
+ err("sys_ptrace() returns %d (expected -1),"
+ " errno %d (expected %d): %m\n",
+ ret, errno, EFAULT);
+ goto out;
+ }
+
+ exit_code = 0;
+out:
+ munmap(addr_rw, 2 * PAGE_SIZE);
+ return exit_code;
+}
+
+int check_direct_path(pid_t child, int shared, int nr)
+{
+ struct ptrace_peeksiginfo_args arg = {.flags = 0, .nr = nr, .off = 0};
+ int i, j, ret, exit_code = -1;
+ siginfo_t siginfo[SIGNR];
+ int si_code;
+
+ if (shared == 1) {
+ arg.flags = PTRACE_PEEKSIGINFO_SHARED;
+ si_code = TEST_SICODE_SHARE;
+ } else {
+ arg.flags = 0;
+ si_code = TEST_SICODE_PRIV;
+ }
+
+ for (i = 0; i < SIGNR; ) {
+ arg.off = i;
+ ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, siginfo);
+ if (ret == -1) {
+ err("ptrace() failed: %m\n");
+ goto out;
+ }
+
+ if (ret == 0)
+ break;
+
+ for (j = 0; j < ret; j++, i++) {
+ if (siginfo[j].si_code == si_code &&
+ siginfo[j].si_int == i)
+ continue;
+
+ err("%d: Wrong siginfo i=%d si_code=%d si_int=%d\n",
+ shared, i, siginfo[j].si_code, siginfo[j].si_int);
+ goto out;
+ }
+ }
+
+ if (i != SIGNR) {
+ err("Only %d signals were read\n", i);
+ goto out;
+ }
+
+ exit_code = 0;
+out:
+ return exit_code;
+}
+
+int main(int argc, char *argv[])
+{
+ siginfo_t siginfo[SIGNR];
+ int i, exit_code = 1;
+ sigset_t blockmask;
+ pid_t child;
+
+ sigemptyset(&blockmask);
+ sigaddset(&blockmask, SIGRTMIN);
+ sigprocmask(SIG_BLOCK, &blockmask, NULL);
+
+ child = fork();
+ if (child == -1) {
+ err("fork() failed: %m");
+ return 1;
+ } else if (child == 0) {
+ pid_t ppid = getppid();
+ while (1) {
+ if (ppid != getppid())
+ break;
+ sleep(1);
+ }
+ return 1;
+ }
+
+ /* Send signals in process-wide and per-thread queues */
+ for (i = 0; i < SIGNR; i++) {
+ siginfo->si_code = TEST_SICODE_SHARE;
+ siginfo->si_int = i;
+ sys_rt_sigqueueinfo(child, SIGRTMIN, siginfo);
+
+ siginfo->si_code = TEST_SICODE_PRIV;
+ siginfo->si_int = i;
+ sys_rt_tgsigqueueinfo(child, child, SIGRTMIN, siginfo);
+ }
+
+ if (sys_ptrace(PTRACE_ATTACH, child, NULL, NULL) == -1)
+ return 1;
+
+ waitpid(child, NULL, 0);
+
+ /* Dump signals one by one*/
+ if (check_direct_path(child, 0, 1))
+ goto out;
+ /* Dump all signals for one call */
+ if (check_direct_path(child, 0, SIGNR))
+ goto out;
+
+ /*
+ * Dump signal from the process-wide queue.
+ * The number of signals is not multible to the buffer size
+ */
+ if (check_direct_path(child, 1, 3))
+ goto out;
+
+ if (check_error_paths(child))
+ goto out;
+
+ printf("PASS\n");
+ exit_code = 0;
+out:
+ if (sys_ptrace(PTRACE_KILL, child, NULL, NULL) == -1)
+ return 1;
+
+ waitpid(child, NULL, 0);
+
+ return exit_code;
+}
diff --git a/tools/testing/selftests/rcutorture/.gitignore b/tools/testing/selftests/rcutorture/.gitignore
new file mode 100644
index 000000000..ccc240275
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/.gitignore
@@ -0,0 +1,4 @@
+initrd
+b[0-9]*
+res
+*.swp
diff --git a/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh b/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh
new file mode 100755
index 000000000..43540f182
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+#
+# Extract the number of CPUs expected from the specified Kconfig-file
+# fragment by checking CONFIG_SMP and CONFIG_NR_CPUS. If the specified
+# file gives no clue, base the number on the number of idle CPUs on
+# the system.
+#
+# Usage: configNR_CPUS.sh config-frag
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2013
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+cf=$1
+if test ! -r $cf
+then
+ echo Unreadable config fragment $cf 1>&2
+ exit -1
+fi
+if grep -q '^CONFIG_SMP=n$' $cf
+then
+ echo 1
+ exit 0
+fi
+if grep -q '^CONFIG_NR_CPUS=' $cf
+then
+ grep '^CONFIG_NR_CPUS=' $cf |
+ sed -e 's/^CONFIG_NR_CPUS=\([0-9]*\).*$/\1/'
+ exit 0
+fi
+cpus2use.sh
diff --git a/tools/testing/selftests/rcutorture/bin/config_override.sh b/tools/testing/selftests/rcutorture/bin/config_override.sh
new file mode 100755
index 000000000..ef7fcbac3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/config_override.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+#
+# config_override.sh base override
+#
+# Combines base and override, removing any Kconfig options from base
+# that conflict with any in override, concatenating what remains and
+# sending the result to standard output.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2017
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+base=$1
+if test -r $base
+then
+ :
+else
+ echo Base file $base unreadable!!!
+ exit 1
+fi
+
+override=$2
+if test -r $override
+then
+ :
+else
+ echo Override file $override unreadable!!!
+ exit 1
+fi
+
+T=${TMPDIR-/tmp}/config_override.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+sed < $override -e 's/^/grep -v "/' -e 's/=.*$/="/' |
+ awk '
+ {
+ if (last)
+ print last " |";
+ last = $0;
+ }
+ END {
+ if (last)
+ print last;
+ }' > $T/script
+sh $T/script < $base
+cat $override
diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh
new file mode 100755
index 000000000..197deece7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Usage: configcheck.sh .config .config-template
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+T=${TMPDIR-/tmp}/abat-chk-config.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+cat $1 > $T/.config
+
+cat $2 | sed -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' |
+awk '
+{
+ print "if grep -q \"" $0 "\" < '"$T/.config"'";
+ print "then";
+ print "\t:";
+ print "else";
+ if ($1 == "#") {
+ print "\tif grep -q \"" $2 "\" < '"$T/.config"'";
+ print "\tthen";
+ print "\t\tif test \"$firsttime\" = \"\""
+ print "\t\tthen"
+ print "\t\t\tfirsttime=1"
+ print "\t\tfi"
+ print "\t\techo \":" $2 ": improperly set\"";
+ print "\telse";
+ print "\t\t:";
+ print "\tfi";
+ } else {
+ print "\tif test \"$firsttime\" = \"\""
+ print "\tthen"
+ print "\t\tfirsttime=1"
+ print "\tfi"
+ print "\techo \":" $0 ": improperly set\"";
+ }
+ print "fi";
+ }' | sh
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
new file mode 100755
index 000000000..65541c21a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/configinit.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+#
+# Usage: configinit.sh config-spec-file build-output-dir results-dir
+#
+# Create a .config file from the spec file. Run from the kernel source tree.
+# Exits with 0 if all went well, with 1 if all went well but the config
+# did not match, and some other number for other failures.
+#
+# The first argument is the .config specification file, which contains
+# desired settings, for example, "CONFIG_NO_HZ=y". For best results,
+# this should be a full pathname.
+#
+# The second argument is a optional path to a build output directory,
+# for example, "O=/tmp/foo". If this argument is omitted, the .config
+# file will be generated directly in the current directory.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2013
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+T=${TMPDIR-/tmp}/configinit.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+# Capture config spec file.
+
+c=$1
+buildloc=$2
+resdir=$3
+builddir=
+if echo $buildloc | grep -q '^O='
+then
+ builddir=`echo $buildloc | sed -e 's/^O=//'`
+ if test ! -d $builddir
+ then
+ mkdir $builddir
+ fi
+else
+ echo Bad build directory: \"$buildloc\"
+ exit 2
+fi
+
+sed -e 's/^\(CONFIG[0-9A-Z_]*\)=.*$/grep -v "^# \1" |/' < $c > $T/u.sh
+sed -e 's/^\(CONFIG[0-9A-Z_]*=\).*$/grep -v \1 |/' < $c >> $T/u.sh
+grep '^grep' < $T/u.sh > $T/upd.sh
+echo "cat - $c" >> $T/upd.sh
+make mrproper
+make $buildloc distclean > $resdir/Make.distclean 2>&1
+make $buildloc $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
+mv $builddir/.config $builddir/.config.sav
+sh $T/upd.sh < $builddir/.config.sav > $builddir/.config
+cp $builddir/.config $builddir/.config.new
+yes '' | make $buildloc oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
+
+# verify new config matches specification.
+configcheck.sh $builddir/.config $c
+
+exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
new file mode 100755
index 000000000..bb99cde3f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+# Get an estimate of how CPU-hoggy to be.
+#
+# Usage: cpus2use.sh
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2013
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+ncpus=`grep '^processor' /proc/cpuinfo | wc -l`
+idlecpus=`mpstat | tail -1 | \
+ awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'`
+awk -v ncpus=$ncpus -v idlecpus=$idlecpus < /dev/null '
+BEGIN {
+ cpus2use = idlecpus;
+ if (cpus2use < 1)
+ cpus2use = 1;
+ if (cpus2use < ncpus / 10)
+ cpus2use = ncpus / 10;
+ if (cpus2use == int(cpus2use))
+ cpus2use = int(cpus2use)
+ else
+ cpus2use = int(cpus2use) + 1
+ print cpus2use;
+}'
+
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
new file mode 100644
index 000000000..65f665502
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -0,0 +1,279 @@
+#!/bin/bash
+#
+# Shell functions for the rest of the scripts.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2013
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+# bootparam_hotplug_cpu bootparam-string
+#
+# Returns 1 if the specified boot-parameter string tells rcutorture to
+# test CPU-hotplug operations.
+bootparam_hotplug_cpu () {
+ echo "$1" | grep -q "rcutorture\.onoff_"
+}
+
+# checkarg --argname argtype $# arg mustmatch cannotmatch
+#
+# Checks the specified argument "arg" against the mustmatch and cannotmatch
+# patterns.
+checkarg () {
+ if test $3 -le 1
+ then
+ echo $1 needs argument $2 matching \"$5\"
+ usage
+ fi
+ if echo "$4" | grep -q -e "$5"
+ then
+ :
+ else
+ echo $1 $2 \"$4\" must match \"$5\"
+ usage
+ fi
+ if echo "$4" | grep -q -e "$6"
+ then
+ echo $1 $2 \"$4\" must not match \"$6\"
+ usage
+ fi
+}
+
+# configfrag_boot_params bootparam-string config-fragment-file
+#
+# Adds boot parameters from the .boot file, if any.
+configfrag_boot_params () {
+ if test -r "$2.boot"
+ then
+ echo $1 `grep -v '^#' "$2.boot" | tr '\012' ' '`
+ else
+ echo $1
+ fi
+}
+
+# configfrag_boot_cpus bootparam-string config-fragment-file config-cpus
+#
+# Decreases number of CPUs based on any nr_cpus= boot parameters specified.
+configfrag_boot_cpus () {
+ local bootargs="`configfrag_boot_params "$1" "$2"`"
+ local nr_cpus
+ if echo "${bootargs}" | grep -q 'nr_cpus=[0-9]'
+ then
+ nr_cpus="`echo "${bootargs}" | sed -e 's/^.*nr_cpus=\([0-9]*\).*$/\1/'`"
+ if test "$3" -gt "$nr_cpus"
+ then
+ echo $nr_cpus
+ else
+ echo $3
+ fi
+ else
+ echo $3
+ fi
+}
+
+# configfrag_boot_maxcpus bootparam-string config-fragment-file config-cpus
+#
+# Decreases number of CPUs based on any maxcpus= boot parameters specified.
+# This allows tests where additional CPUs come online later during the
+# test run. However, the torture parameters will be set based on the
+# number of CPUs initially present, so the scripting should schedule
+# test runs based on the maxcpus= boot parameter controlling the initial
+# number of CPUs instead of on the ultimate number of CPUs.
+configfrag_boot_maxcpus () {
+ local bootargs="`configfrag_boot_params "$1" "$2"`"
+ local maxcpus
+ if echo "${bootargs}" | grep -q 'maxcpus=[0-9]'
+ then
+ maxcpus="`echo "${bootargs}" | sed -e 's/^.*maxcpus=\([0-9]*\).*$/\1/'`"
+ if test "$3" -gt "$maxcpus"
+ then
+ echo $maxcpus
+ else
+ echo $3
+ fi
+ else
+ echo $3
+ fi
+}
+
+# configfrag_hotplug_cpu config-fragment-file
+#
+# Returns 1 if the config fragment specifies hotplug CPU.
+configfrag_hotplug_cpu () {
+ if test ! -r "$1"
+ then
+ echo Unreadable config fragment "$1" 1>&2
+ exit -1
+ fi
+ grep -q '^CONFIG_HOTPLUG_CPU=y$' "$1"
+}
+
+# identify_boot_image qemu-cmd
+#
+# Returns the relative path to the kernel build image. This will be
+# arch/<arch>/boot/bzImage or vmlinux if bzImage is not a target for the
+# architecture, unless overridden with the TORTURE_BOOT_IMAGE environment
+# variable.
+identify_boot_image () {
+ if test -n "$TORTURE_BOOT_IMAGE"
+ then
+ echo $TORTURE_BOOT_IMAGE
+ else
+ case "$1" in
+ qemu-system-x86_64|qemu-system-i386)
+ echo arch/x86/boot/bzImage
+ ;;
+ qemu-system-aarch64)
+ echo arch/arm64/boot/Image
+ ;;
+ *)
+ echo vmlinux
+ ;;
+ esac
+ fi
+}
+
+# identify_qemu builddir
+#
+# Returns our best guess as to which qemu command is appropriate for
+# the kernel at hand. Override with the TORTURE_QEMU_CMD environment variable.
+identify_qemu () {
+ local u="`file "$1"`"
+ if test -n "$TORTURE_QEMU_CMD"
+ then
+ echo $TORTURE_QEMU_CMD
+ elif echo $u | grep -q x86-64
+ then
+ echo qemu-system-x86_64
+ elif echo $u | grep -q "Intel 80386"
+ then
+ echo qemu-system-i386
+ elif echo $u | grep -q aarch64
+ then
+ echo qemu-system-aarch64
+ elif uname -a | grep -q ppc64
+ then
+ echo qemu-system-ppc64
+ else
+ echo Cannot figure out what qemu command to use! 1>&2
+ echo file $1 output: $u
+ # Usually this will be one of /usr/bin/qemu-system-*
+ # Use TORTURE_QEMU_CMD environment variable or appropriate
+ # argument to top-level script.
+ exit 1
+ fi
+}
+
+# identify_qemu_append qemu-cmd
+#
+# Output arguments for the qemu "-append" string based on CPU type
+# and the TORTURE_QEMU_INTERACTIVE environment variable.
+identify_qemu_append () {
+ local console=ttyS0
+ case "$1" in
+ qemu-system-x86_64|qemu-system-i386)
+ echo noapic selinux=0 initcall_debug debug
+ ;;
+ qemu-system-aarch64)
+ console=ttyAMA0
+ ;;
+ esac
+ if test -n "$TORTURE_QEMU_INTERACTIVE"
+ then
+ echo root=/dev/sda
+ else
+ echo console=$console
+ fi
+}
+
+# identify_qemu_args qemu-cmd serial-file
+#
+# Output arguments for qemu arguments based on the TORTURE_QEMU_MAC
+# and TORTURE_QEMU_INTERACTIVE environment variables.
+identify_qemu_args () {
+ case "$1" in
+ qemu-system-x86_64|qemu-system-i386)
+ ;;
+ qemu-system-aarch64)
+ echo -machine virt,gic-version=host -cpu host
+ ;;
+ qemu-system-ppc64)
+ echo -enable-kvm -M pseries -nodefaults
+ echo -device spapr-vscsi
+ if test -n "$TORTURE_QEMU_INTERACTIVE" -a -n "$TORTURE_QEMU_MAC"
+ then
+ echo -device spapr-vlan,netdev=net0,mac=$TORTURE_QEMU_MAC
+ echo -netdev bridge,br=br0,id=net0
+ elif test -n "$TORTURE_QEMU_INTERACTIVE"
+ then
+ echo -net nic -net user
+ fi
+ ;;
+ esac
+ if test -n "$TORTURE_QEMU_INTERACTIVE"
+ then
+ echo -monitor stdio -serial pty -S
+ else
+ echo -serial file:$2
+ fi
+}
+
+# identify_qemu_vcpus
+#
+# Returns the number of virtual CPUs available to the aggregate of the
+# guest OSes.
+identify_qemu_vcpus () {
+ lscpu | grep '^CPU(s):' | sed -e 's/CPU(s)://'
+}
+
+# print_bug
+#
+# Prints "BUG: " in red followed by remaining arguments
+print_bug () {
+ printf '\033[031mBUG: \033[m'
+ echo $*
+}
+
+# print_warning
+#
+# Prints "WARNING: " in yellow followed by remaining arguments
+print_warning () {
+ printf '\033[033mWARNING: \033[m'
+ echo $*
+}
+
+# specify_qemu_cpus qemu-cmd qemu-args #cpus
+#
+# Appends a string containing "-smp XXX" to qemu-args, unless the incoming
+# qemu-args already contains "-smp".
+specify_qemu_cpus () {
+ local nt;
+
+ if echo $2 | grep -q -e -smp
+ then
+ echo $2
+ else
+ case "$1" in
+ qemu-system-x86_64|qemu-system-i386|qemu-system-aarch64)
+ echo $2 -smp $3
+ ;;
+ qemu-system-ppc64)
+ nt="`lscpu | grep '^NUMA node0' | sed -e 's/^[^,]*,\([0-9]*\),.*$/\1/'`"
+ echo $2 -smp cores=`expr \( $3 + $nt - 1 \) / $nt`,threads=$nt
+ ;;
+ esac
+ fi
+}
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
new file mode 100755
index 000000000..363382837
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+#
+# Alternate sleeping and spinning on randomly selected CPUs. The purpose
+# of this script is to inflict random OS jitter on a concurrently running
+# test.
+#
+# Usage: jitter.sh me duration [ sleepmax [ spinmax ] ]
+#
+# me: Random-number-generator seed salt.
+# duration: Time to run in seconds.
+# sleepmax: Maximum microseconds to sleep, defaults to one second.
+# spinmax: Maximum microseconds to spin, defaults to one millisecond.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+me=$(($1 * 1000))
+duration=$2
+sleepmax=${3-1000000}
+spinmax=${4-1000}
+
+n=1
+
+starttime=`awk 'BEGIN { print systime(); }' < /dev/null`
+
+while :
+do
+ # Check for done.
+ t=`awk -v s=$starttime 'BEGIN { print systime() - s; }' < /dev/null`
+ if test "$t" -gt "$duration"
+ then
+ exit 0;
+ fi
+
+ # Set affinity to randomly selected CPU
+ cpus=`ls /sys/devices/system/cpu/*/online |
+ sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//' |
+ grep -v '^0*$'`
+ cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN {
+ srand(n + me + systime());
+ ncpus = split(cpus, ca);
+ curcpu = ca[int(rand() * ncpus + 1)];
+ mask = lshift(1, curcpu);
+ if (mask + 0 <= 0)
+ mask = 1;
+ printf("%#x\n", mask);
+ }' < /dev/null`
+ n=$(($n+1))
+ if ! taskset -p $cpumask $$ > /dev/null 2>&1
+ then
+ echo taskset failure: '"taskset -p ' $cpumask $$ '"'
+ exit 1
+ fi
+
+ # Sleep a random duration
+ sleeptime=`awk -v me=$me -v n=$n -v sleepmax=$sleepmax 'BEGIN {
+ srand(n + me + systime());
+ printf("%06d", int(rand() * sleepmax));
+ }' < /dev/null`
+ n=$(($n+1))
+ sleep .$sleeptime
+
+ # Spin a random duration
+ limit=`awk -v me=$me -v n=$n -v spinmax=$spinmax 'BEGIN {
+ srand(n + me + systime());
+ printf("%06d", int(rand() * spinmax));
+ }' < /dev/null`
+ n=$(($n+1))
+ for i in {1..$limit}
+ do
+ echo > /dev/null
+ done
+done
+
+exit 1
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
new file mode 100755
index 000000000..9115fcdb5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+#
+# Build a kvm-ready Linux kernel from the tree in the current directory.
+#
+# Usage: kvm-build.sh config-template build-dir resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+config_template=${1}
+if test -z "$config_template" -o ! -f "$config_template" -o ! -r "$config_template"
+then
+ echo "kvm-build.sh :$config_template: Not a readable file"
+ exit 1
+fi
+builddir=${2}
+resdir=${3}
+
+T=${TMPDIR-/tmp}/test-linux.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+cp ${config_template} $T/config
+cat << ___EOF___ >> $T/config
+CONFIG_INITRAMFS_SOURCE="$TORTURE_INITRD"
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_CONSOLE=y
+___EOF___
+
+configinit.sh $T/config O=$builddir $resdir
+retval=$?
+if test $retval -gt 1
+then
+ exit 2
+fi
+ncpus=`cpus2use.sh`
+make O=$builddir -j$ncpus $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
+retval=$?
+if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out
+then
+ echo Kernel build error
+ egrep "Stop|Error|error:|warning:" < $resdir/Make.out
+ echo Run aborted.
+ exit 3
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
new file mode 100755
index 000000000..98f650c9b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+#
+# Invoke a text editor on all console.log files for all runs with diagnostics,
+# that is, on all such files having a console.log.diags counterpart.
+# Note that both console.log.diags and console.log are passed to the
+# editor (currently defaulting to "vi"), allowing the user to get an
+# idea of what to search for in the console.log file.
+#
+# Usage: kvm-find-errors.sh directory
+#
+# The "directory" above should end with the date/time directory, for example,
+# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
+
+rundir="${1}"
+if test -z "$rundir" -o ! -d "$rundir"
+then
+ echo Usage: $0 directory
+fi
+editor=${EDITOR-vi}
+
+# Find builds with errors
+files=
+for i in ${rundir}/*/Make.out
+do
+ if egrep -q "error:|warning:" < $i
+ then
+ egrep "error:|warning:" < $i > $i.diags
+ files="$files $i.diags $i"
+ fi
+done
+if test -n "$files"
+then
+ $editor $files
+else
+ echo No build errors.
+fi
+if grep -q -e "--buildonly" < ${rundir}/log
+then
+ echo Build-only run, no console logs to check.
+fi
+
+# Find console logs with errors
+files=
+for i in ${rundir}/*/console.log
+do
+ if test -r $i.diags
+ then
+ files="$files $i.diags $i"
+ fi
+done
+if test -n "$files"
+then
+ $editor $files
+else
+ echo No errors in console logs.
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
new file mode 100755
index 000000000..2de92f43e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+#
+# Analyze a given results directory for locktorture progress.
+#
+# Usage: kvm-recheck-lock.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2014
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+if test -d "$i" -a -r "$i"
+then
+ :
+else
+ echo Unreadable results directory: $i
+ exit 1
+fi
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+ncs=`grep "Writes: Total:" $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* Total: //' -e 's/ .*$//'`
+if test -z "$ncs"
+then
+ echo "$configfile -------"
+else
+ title="$configfile ------- $ncs acquisitions/releases"
+ dur=`sed -e 's/^.* locktorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
+ if test -z "$dur"
+ then
+ :
+ else
+ ncsps=`awk -v ncs=$ncs -v dur=$dur '
+ BEGIN { print ncs / dur }' < /dev/null`
+ title="$title ($ncsps per second)"
+ fi
+ echo $title
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
new file mode 100755
index 000000000..0fa8a61cc
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+#
+# Analyze a given results directory for rcutorture progress.
+#
+# Usage: kvm-recheck-rcu.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2014
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+if test -d "$i" -a -r "$i"
+then
+ :
+else
+ echo Unreadable results directory: $i
+ exit 1
+fi
+. functions.sh
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'`
+stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
+ tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
+ awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
+ tr -d '\012\015'`"
+if test -z "$ngps"
+then
+ echo "$configfile ------- " $stopstate
+else
+ title="$configfile ------- $ngps GPs"
+ dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
+ if test -z "$dur"
+ then
+ :
+ else
+ ngpsps=`awk -v ngps=$ngps -v dur=$dur '
+ BEGIN { print ngps / dur }' < /dev/null`
+ title="$title ($ngpsps/s)"
+ fi
+ echo $title $stopstate
+ nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
+ if test -z "$nclosecalls"
+ then
+ exit 0
+ fi
+ if test "$nclosecalls" -eq 0
+ then
+ exit 0
+ fi
+ # Compute number of close calls per tenth of an hour
+ nclosecalls10=`awk -v nclosecalls=$nclosecalls -v dur=$dur 'BEGIN { print int(nclosecalls * 36000 / dur) }' < /dev/null`
+ if test $nclosecalls10 -gt 5 -a $nclosecalls -gt 1
+ then
+ print_bug $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i
+ else
+ print_warning $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i
+ fi
+ echo $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i > $i/console.log.rcu.diags
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
new file mode 100755
index 000000000..8948f7926
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+#
+# Analyze a given results directory for rcuperf performance measurements,
+# looking for ftrace data. Exits with 0 if data was found, analyzed, and
+# printed. Intended to be invoked from kvm-recheck-rcuperf.sh after
+# argument checking.
+#
+# Usage: kvm-recheck-rcuperf-ftrace.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+. functions.sh
+
+if test "`grep -c 'rcu_exp_grace_period.*start' < $i/console.log`" -lt 100
+then
+ exit 10
+fi
+
+sed -e 's/^\[[^]]*]//' < $i/console.log |
+grep 'us : rcu_exp_grace_period' |
+sed -e 's/us : / : /' |
+tr -d '\015' |
+awk '
+$8 == "start" {
+ if (startseq != "")
+ nlost++;
+ starttask = $1;
+ starttime = $3;
+ startseq = $7;
+ seqtask[startseq] = starttask;
+}
+
+$8 == "end" {
+ if (startseq == $7) {
+ curgpdur = $3 - starttime;
+ gptimes[++n] = curgpdur;
+ gptaskcnt[starttask]++;
+ sum += curgpdur;
+ if (curgpdur > 1000)
+ print "Long GP " starttime "us to " $3 "us (" curgpdur "us)";
+ startseq = "";
+ } else {
+ # Lost a message or some such, reset.
+ startseq = "";
+ nlost++;
+ }
+}
+
+$8 == "done" && seqtask[$7] != $1 {
+ piggybackcnt[$1]++;
+}
+
+END {
+ newNR = asort(gptimes);
+ if (newNR <= 0) {
+ print "No ftrace records found???"
+ exit 10;
+ }
+ pct50 = int(newNR * 50 / 100);
+ if (pct50 < 1)
+ pct50 = 1;
+ pct90 = int(newNR * 90 / 100);
+ if (pct90 < 1)
+ pct90 = 1;
+ pct99 = int(newNR * 99 / 100);
+ if (pct99 < 1)
+ pct99 = 1;
+ div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100;
+ print "Histogram bucket size: " div;
+ last = gptimes[1] - 10;
+ count = 0;
+ for (i = 1; i <= newNR; i++) {
+ current = div * int(gptimes[i] / div);
+ if (last == current) {
+ count++;
+ } else {
+ if (count > 0)
+ print last, count;
+ count = 1;
+ last = current;
+ }
+ }
+ if (count > 0)
+ print last, count;
+ print "Distribution of grace periods across tasks:";
+ for (i in gptaskcnt) {
+ print "\t" i, gptaskcnt[i];
+ nbatches += gptaskcnt[i];
+ }
+ ngps = nbatches;
+ print "Distribution of piggybacking across tasks:";
+ for (i in piggybackcnt) {
+ print "\t" i, piggybackcnt[i];
+ ngps += piggybackcnt[i];
+ }
+ print "Average grace-period duration: " sum / newNR " microseconds";
+ print "Minimum grace-period duration: " gptimes[1];
+ print "50th percentile grace-period duration: " gptimes[pct50];
+ print "90th percentile grace-period duration: " gptimes[pct90];
+ print "99th percentile grace-period duration: " gptimes[pct99];
+ print "Maximum grace-period duration: " gptimes[newNR];
+ print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches " Lost: " nlost + 0;
+ print "Computed from ftrace data.";
+}'
+exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
new file mode 100755
index 000000000..ccebf772f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+#
+# Analyze a given results directory for rcuperf performance measurements.
+#
+# Usage: kvm-recheck-rcuperf.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+if test -d "$i" -a -r "$i"
+then
+ :
+else
+ echo Unreadable results directory: $i
+ exit 1
+fi
+PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
+. functions.sh
+
+if kvm-recheck-rcuperf-ftrace.sh $i
+then
+ # ftrace data was successfully analyzed, call it good!
+ exit 0
+fi
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+
+sed -e 's/^\[[^]]*]//' < $i/console.log |
+awk '
+/-perf: .* gps: .* batches:/ {
+ ngps = $9;
+ nbatches = $11;
+}
+
+/-perf: .*writer-duration/ {
+ gptimes[++n] = $5 / 1000.;
+ sum += $5 / 1000.;
+}
+
+END {
+ newNR = asort(gptimes);
+ if (newNR <= 0) {
+ print "No rcuperf records found???"
+ exit;
+ }
+ pct50 = int(newNR * 50 / 100);
+ if (pct50 < 1)
+ pct50 = 1;
+ pct90 = int(newNR * 90 / 100);
+ if (pct90 < 1)
+ pct90 = 1;
+ pct99 = int(newNR * 99 / 100);
+ if (pct99 < 1)
+ pct99 = 1;
+ div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100;
+ print "Histogram bucket size: " div;
+ last = gptimes[1] - 10;
+ count = 0;
+ for (i = 1; i <= newNR; i++) {
+ current = div * int(gptimes[i] / div);
+ if (last == current) {
+ count++;
+ } else {
+ if (count > 0)
+ print last, count;
+ count = 1;
+ last = current;
+ }
+ }
+ if (count > 0)
+ print last, count;
+ print "Average grace-period duration: " sum / newNR " microseconds";
+ print "Minimum grace-period duration: " gptimes[1];
+ print "50th percentile grace-period duration: " gptimes[pct50];
+ print "90th percentile grace-period duration: " gptimes[pct90];
+ print "99th percentile grace-period duration: " gptimes[pct99];
+ print "Maximum grace-period duration: " gptimes[newNR];
+ print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches;
+ print "Computed from rcuperf printk output.";
+}'
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
new file mode 100755
index 000000000..c9bab57a7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+#
+# Given the results directories for previous KVM-based torture runs,
+# check the build and console output for errors. Given a directory
+# containing results directories, this recursively checks them all.
+#
+# Usage: kvm-recheck.sh resdir ...
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
+. functions.sh
+for rd in "$@"
+do
+ firsttime=1
+ dirs=`find $rd -name Make.defconfig.out -print | sort | sed -e 's,/[^/]*$,,' | sort -u`
+ for i in $dirs
+ do
+ if test -n "$firsttime"
+ then
+ firsttime=""
+ resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'`
+ head -1 $resdir/log
+ fi
+ TORTURE_SUITE="`cat $i/../TORTURE_SUITE`"
+ rm -f $i/console.log.*.diags
+ kvm-recheck-${TORTURE_SUITE}.sh $i
+ if test -f "$i/console.log"
+ then
+ configcheck.sh $i/.config $i/ConfigFragment
+ if test -r $i/Make.oldconfig.err
+ then
+ cat $i/Make.oldconfig.err
+ fi
+ parse-build.sh $i/Make.out $configfile
+ parse-console.sh $i/console.log $configfile
+ if test -r $i/Warnings
+ then
+ cat $i/Warnings
+ fi
+ else
+ if test -f "$i/qemu-cmd"
+ then
+ print_bug qemu failed
+ echo " $i"
+ elif test -f "$i/buildonly"
+ then
+ echo Build-only run, no boot/test
+ configcheck.sh $i/.config $i/ConfigFragment
+ parse-build.sh $i/Make.out $configfile
+ else
+ print_bug Build failed
+ echo " $i"
+ fi
+ fi
+ done
+done
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
new file mode 100755
index 000000000..f7247ee00
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -0,0 +1,271 @@
+#!/bin/bash
+#
+# Run a kvm-based test of the specified tree on the specified configs.
+# Fully automated run and error checking, no graphics console.
+#
+# Execute this in the source tree. Do not run it as a background task
+# because qemu does not seem to like that much.
+#
+# Usage: kvm-test-1-run.sh config builddir resdir seconds qemu-args boot_args
+#
+# qemu-args defaults to "-enable-kvm -nographic", along with arguments
+# specifying the number of CPUs and other options
+# generated from the underlying CPU architecture.
+# boot_args defaults to value returned by the per_version_boot_params
+# shell function.
+#
+# Anything you specify for either qemu-args or boot_args is appended to
+# the default values. The "-smp" value is deduced from the contents of
+# the config fragment.
+#
+# More sophisticated argument parsing is clearly needed.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+T=${TMPDIR-/tmp}/kvm-test-1-run.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+. functions.sh
+. $CONFIGFRAG/ver_functions.sh
+
+config_template=${1}
+config_dir=`echo $config_template | sed -e 's,/[^/]*$,,'`
+title=`echo $config_template | sed -e 's/^.*\///'`
+builddir=${2}
+if test -z "$builddir" -o ! -d "$builddir" -o ! -w "$builddir"
+then
+ echo "kvm-test-1-run.sh :$builddir: Not a writable directory, cannot build into it"
+ exit 1
+fi
+resdir=${3}
+if test -z "$resdir" -o ! -d "$resdir" -o ! -w "$resdir"
+then
+ echo "kvm-test-1-run.sh :$resdir: Not a writable directory, cannot store results into it"
+ exit 1
+fi
+echo ' ---' `date`: Starting build
+echo ' ---' Kconfig fragment at: $config_template >> $resdir/log
+touch $resdir/ConfigFragment.input $resdir/ConfigFragment
+if test -r "$config_dir/CFcommon"
+then
+ echo " --- $config_dir/CFcommon" >> $resdir/ConfigFragment.input
+ cat < $config_dir/CFcommon >> $resdir/ConfigFragment.input
+ config_override.sh $config_dir/CFcommon $config_template > $T/Kc1
+ grep '#CHECK#' $config_dir/CFcommon >> $resdir/ConfigFragment
+else
+ cp $config_template $T/Kc1
+fi
+echo " --- $config_template" >> $resdir/ConfigFragment.input
+cat $config_template >> $resdir/ConfigFragment.input
+grep '#CHECK#' $config_template >> $resdir/ConfigFragment
+if test -n "$TORTURE_KCONFIG_ARG"
+then
+ echo $TORTURE_KCONFIG_ARG | tr -s " " "\012" > $T/cmdline
+ echo " --- --kconfig argument" >> $resdir/ConfigFragment.input
+ cat $T/cmdline >> $resdir/ConfigFragment.input
+ config_override.sh $T/Kc1 $T/cmdline > $T/Kc2
+ # Note that "#CHECK#" is not permitted on commandline.
+else
+ cp $T/Kc1 $T/Kc2
+fi
+cat $T/Kc2 >> $resdir/ConfigFragment
+
+base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
+if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux
+then
+ # Rerunning previous test, so use that test's kernel.
+ QEMU="`identify_qemu $base_resdir/vmlinux`"
+ BOOT_IMAGE="`identify_boot_image $QEMU`"
+ KERNEL=$base_resdir/${BOOT_IMAGE##*/} # use the last component of ${BOOT_IMAGE}
+ ln -s $base_resdir/Make*.out $resdir # for kvm-recheck.sh
+ ln -s $base_resdir/.config $resdir # for kvm-recheck.sh
+ # Arch-independent indicator
+ touch $resdir/builtkernel
+elif kvm-build.sh $T/Kc2 $builddir $resdir
+then
+ # Had to build a kernel for this test.
+ QEMU="`identify_qemu $builddir/vmlinux`"
+ BOOT_IMAGE="`identify_boot_image $QEMU`"
+ cp $builddir/vmlinux $resdir
+ cp $builddir/.config $resdir
+ cp $builddir/Module.symvers $resdir > /dev/null || :
+ cp $builddir/System.map $resdir > /dev/null || :
+ if test -n "$BOOT_IMAGE"
+ then
+ cp $builddir/$BOOT_IMAGE $resdir
+ KERNEL=$resdir/${BOOT_IMAGE##*/}
+ # Arch-independent indicator
+ touch $resdir/builtkernel
+ else
+ echo No identifiable boot image, not running KVM, see $resdir.
+ echo Do the torture scripts know about your architecture?
+ fi
+ parse-build.sh $resdir/Make.out $title
+else
+ # Build failed.
+ cp $builddir/Make*.out $resdir
+ cp $builddir/.config $resdir || :
+ echo Build failed, not running KVM, see $resdir.
+ if test -f $builddir.wait
+ then
+ mv $builddir.wait $builddir.ready
+ fi
+ exit 1
+fi
+if test -f $builddir.wait
+then
+ mv $builddir.wait $builddir.ready
+fi
+while test -f $builddir.ready
+do
+ sleep 1
+done
+seconds=$4
+qemu_args=$5
+boot_args=$6
+
+cd $KVM
+kstarttime=`awk 'BEGIN { print systime() }' < /dev/null`
+if test -z "$TORTURE_BUILDONLY"
+then
+ echo ' ---' `date`: Starting kernel
+fi
+
+# Generate -smp qemu argument.
+qemu_args="-enable-kvm -nographic $qemu_args"
+cpu_count=`configNR_CPUS.sh $resdir/ConfigFragment`
+cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"`
+vcpus=`identify_qemu_vcpus`
+if test $cpu_count -gt $vcpus
+then
+ echo CPU count limited from $cpu_count to $vcpus | tee -a $resdir/Warnings
+ cpu_count=$vcpus
+fi
+qemu_args="`specify_qemu_cpus "$QEMU" "$qemu_args" "$cpu_count"`"
+
+# Generate architecture-specific and interaction-specific qemu arguments
+qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$resdir/console.log"`"
+
+# Generate qemu -append arguments
+qemu_append="`identify_qemu_append "$QEMU"`"
+
+# Pull in Kconfig-fragment boot parameters
+boot_args="`configfrag_boot_params "$boot_args" "$config_template"`"
+# Generate kernel-version-specific boot parameters
+boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`"
+
+if test -n "$TORTURE_BUILDONLY"
+then
+ echo Build-only run specified, boot/test omitted.
+ touch $resdir/buildonly
+ exit 0
+fi
+echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
+echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
+( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
+commandcompleted=0
+sleep 10 # Give qemu's pid a chance to reach the file
+if test -s "$resdir/qemu_pid"
+then
+ qemu_pid=`cat "$resdir/qemu_pid"`
+ echo Monitoring qemu job at pid $qemu_pid
+else
+ qemu_pid=""
+ echo Monitoring qemu job at yet-as-unknown pid
+fi
+while :
+do
+ if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+ then
+ qemu_pid=`cat "$resdir/qemu_pid"`
+ fi
+ kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
+ if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
+ then
+ if test $kruntime -ge $seconds
+ then
+ break;
+ fi
+ sleep 1
+ else
+ commandcompleted=1
+ if test $kruntime -lt $seconds
+ then
+ echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1
+ grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1
+ killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`"
+ if test -n "$killpid"
+ then
+ echo "ps -fp $killpid" >> $resdir/Warnings 2>&1
+ ps -fp $killpid >> $resdir/Warnings 2>&1
+ fi
+ else
+ echo ' ---' `date`: "Kernel done"
+ fi
+ break
+ fi
+done
+if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+then
+ qemu_pid=`cat "$resdir/qemu_pid"`
+fi
+if test $commandcompleted -eq 0 -a -n "$qemu_pid"
+then
+ echo Grace period for qemu job at pid $qemu_pid
+ oldline="`tail $resdir/console.log`"
+ while :
+ do
+ kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
+ if kill -0 $qemu_pid > /dev/null 2>&1
+ then
+ :
+ else
+ break
+ fi
+ must_continue=no
+ newline="`tail $resdir/console.log`"
+ if test "$newline" != "$oldline" && echo $newline | grep -q ' [0-9]\+us : '
+ then
+ must_continue=yes
+ fi
+ last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`"
+ if test -z "last_ts"
+ then
+ last_ts=0
+ fi
+ if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE))
+ then
+ must_continue=yes
+ fi
+ if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
+ then
+ echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1
+ kill -KILL $qemu_pid
+ break
+ fi
+ oldline=$newline
+ sleep 10
+ done
+elif test -z "$qemu_pid"
+then
+ echo Unknown PID, cannot kill qemu command
+fi
+
+parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
new file mode 100755
index 000000000..5a7a62d76
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -0,0 +1,471 @@
+#!/bin/bash
+#
+# Run a series of tests under KVM. By default, this series is specified
+# by the relevant CFLIST file, but can be overridden by the --configs
+# command-line argument.
+#
+# Usage: kvm.sh [ options ]
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+scriptname=$0
+args="$*"
+
+T=${TMPDIR-/tmp}/kvm.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+cd `dirname $scriptname`/../../../../../
+
+dur=$((30*60))
+dryrun=""
+KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
+PATH=${KVM}/bin:$PATH; export PATH
+TORTURE_DEFCONFIG=defconfig
+TORTURE_BOOT_IMAGE=""
+TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
+TORTURE_KCONFIG_ARG=""
+TORTURE_KMAKE_ARG=""
+TORTURE_QEMU_MEM=512
+TORTURE_SHUTDOWN_GRACE=180
+TORTURE_SUITE=rcu
+resdir=""
+configs=""
+cpus=0
+ds=`date +%Y.%m.%d-%H:%M:%S`
+jitter="-1"
+
+. functions.sh
+
+usage () {
+ echo "Usage: $scriptname optional arguments:"
+ echo " --bootargs kernel-boot-arguments"
+ echo " --bootimage relative-path-to-kernel-boot-image"
+ echo " --buildonly"
+ echo " --configs \"config-file list w/ repeat factor (3*TINY01)\""
+ echo " --cpus N"
+ echo " --datestamp string"
+ echo " --defconfig string"
+ echo " --dryrun sched|script"
+ echo " --duration minutes"
+ echo " --interactive"
+ echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]"
+ echo " --kconfig Kconfig-options"
+ echo " --kmake-arg kernel-make-arguments"
+ echo " --mac nn:nn:nn:nn:nn:nn"
+ echo " --memory megabytes | nnnG"
+ echo " --no-initrd"
+ echo " --qemu-args qemu-arguments"
+ echo " --qemu-cmd qemu-system-..."
+ echo " --results absolute-pathname"
+ echo " --torture rcu"
+ exit 1
+}
+
+while test $# -gt 0
+do
+ case "$1" in
+ --bootargs|--bootarg)
+ checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--'
+ TORTURE_BOOTARGS="$2"
+ shift
+ ;;
+ --bootimage)
+ checkarg --bootimage "(relative path to kernel boot image)" "$#" "$2" '[a-zA-Z0-9][a-zA-Z0-9_]*' '^--'
+ TORTURE_BOOT_IMAGE="$2"
+ shift
+ ;;
+ --buildonly)
+ TORTURE_BUILDONLY=1
+ ;;
+ --configs|--config)
+ checkarg --configs "(list of config files)" "$#" "$2" '^[^/]*$' '^--'
+ configs="$2"
+ shift
+ ;;
+ --cpus)
+ checkarg --cpus "(number)" "$#" "$2" '^[0-9]*$' '^--'
+ cpus=$2
+ shift
+ ;;
+ --datestamp)
+ checkarg --datestamp "(relative pathname)" "$#" "$2" '^[^/]*$' '^--'
+ ds=$2
+ shift
+ ;;
+ --defconfig)
+ checkarg --defconfig "defconfigtype" "$#" "$2" '^[^/][^/]*$' '^--'
+ TORTURE_DEFCONFIG=$2
+ shift
+ ;;
+ --dryrun)
+ checkarg --dryrun "sched|script" $# "$2" 'sched\|script' '^--'
+ dryrun=$2
+ shift
+ ;;
+ --duration)
+ checkarg --duration "(minutes)" $# "$2" '^[0-9]*$' '^error'
+ dur=$(($2*60))
+ shift
+ ;;
+ --interactive)
+ TORTURE_QEMU_INTERACTIVE=1; export TORTURE_QEMU_INTERACTIVE
+ ;;
+ --jitter)
+ checkarg --jitter "(# threads [ sleep [ spin ] ])" $# "$2" '^-\{,1\}[0-9]\+\( \+[0-9]\+\)\{,2\} *$' '^error$'
+ jitter="$2"
+ shift
+ ;;
+ --kconfig)
+ checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$'
+ TORTURE_KCONFIG_ARG="$2"
+ shift
+ ;;
+ --kmake-arg)
+ checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
+ TORTURE_KMAKE_ARG="$2"
+ shift
+ ;;
+ --mac)
+ checkarg --mac "(MAC address)" $# "$2" '^\([0-9a-fA-F]\{2\}:\)\{5\}[0-9a-fA-F]\{2\}$' error
+ TORTURE_QEMU_MAC=$2
+ shift
+ ;;
+ --memory)
+ checkarg --memory "(memory size)" $# "$2" '^[0-9]\+[MG]\?$' error
+ TORTURE_QEMU_MEM=$2
+ shift
+ ;;
+ --no-initrd)
+ TORTURE_INITRD=""; export TORTURE_INITRD
+ ;;
+ --qemu-args|--qemu-arg)
+ checkarg --qemu-args "(qemu arguments)" $# "$2" '^-' '^error'
+ TORTURE_QEMU_ARG="$2"
+ shift
+ ;;
+ --qemu-cmd)
+ checkarg --qemu-cmd "(qemu-system-...)" $# "$2" 'qemu-system-' '^--'
+ TORTURE_QEMU_CMD="$2"
+ shift
+ ;;
+ --results)
+ checkarg --results "(absolute pathname)" "$#" "$2" '^/' '^error'
+ resdir=$2
+ shift
+ ;;
+ --shutdown-grace)
+ checkarg --shutdown-grace "(seconds)" "$#" "$2" '^[0-9]*$' '^error'
+ TORTURE_SHUTDOWN_GRACE=$2
+ shift
+ ;;
+ --torture)
+ checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\)$' '^--'
+ TORTURE_SUITE=$2
+ shift
+ if test "$TORTURE_SUITE" = rcuperf
+ then
+ # If you really want jitter for rcuperf, specify
+ # it after specifying rcuperf. (But why?)
+ jitter=0
+ fi
+ ;;
+ *)
+ echo Unknown argument $1
+ usage
+ ;;
+ esac
+ shift
+done
+
+CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG
+
+if test -z "$configs"
+then
+ configs="`cat $CONFIGFRAG/CFLIST`"
+fi
+
+if test -z "$resdir"
+then
+ resdir=$KVM/res
+fi
+
+# Create a file of test-name/#cpus pairs, sorted by decreasing #cpus.
+touch $T/cfgcpu
+for CF in $configs
+do
+ case $CF in
+ [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**)
+ config_reps=`echo $CF | sed -e 's/\*.*$//'`
+ CF1=`echo $CF | sed -e 's/^[^*]*\*//'`
+ ;;
+ *)
+ config_reps=1
+ CF1=$CF
+ ;;
+ esac
+ if test -f "$CONFIGFRAG/$CF1"
+ then
+ cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1`
+ cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
+ cpu_count=`configfrag_boot_maxcpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
+ for ((cur_rep=0;cur_rep<$config_reps;cur_rep++))
+ do
+ echo $CF1 $cpu_count >> $T/cfgcpu
+ done
+ else
+ echo "The --configs file $CF1 does not exist, terminating."
+ exit 1
+ fi
+done
+sort -k2nr $T/cfgcpu -T="$T" > $T/cfgcpu.sort
+
+# Use a greedy bin-packing algorithm, sorting the list accordingly.
+awk < $T/cfgcpu.sort > $T/cfgcpu.pack -v ncpus=$cpus '
+BEGIN {
+ njobs = 0;
+}
+
+{
+ # Read file of tests and corresponding required numbers of CPUs.
+ cf[njobs] = $1;
+ cpus[njobs] = $2;
+ njobs++;
+}
+
+END {
+ batch = 0;
+ nc = -1;
+
+ # Each pass through the following loop creates on test batch
+ # that can be executed concurrently given ncpus. Note that a
+ # given test that requires more than the available CPUs will run in
+ # their own batch. Such tests just have to make do with what
+ # is available.
+ while (nc != ncpus) {
+ batch++;
+ nc = ncpus;
+
+ # Each pass through the following loop considers one
+ # test for inclusion in the current batch.
+ for (i = 0; i < njobs; i++) {
+ if (done[i])
+ continue; # Already part of a batch.
+ if (nc >= cpus[i] || nc == ncpus) {
+
+ # This test fits into the current batch.
+ done[i] = batch;
+ nc -= cpus[i];
+ if (nc <= 0)
+ break; # Too-big test in its own batch.
+ }
+ }
+ }
+
+ # Dump out the tests in batch order.
+ for (b = 1; b <= batch; b++)
+ for (i = 0; i < njobs; i++)
+ if (done[i] == b)
+ print cf[i], cpus[i];
+}'
+
+# Generate a script to execute the tests in appropriate batches.
+cat << ___EOF___ > $T/script
+CONFIGFRAG="$CONFIGFRAG"; export CONFIGFRAG
+KVM="$KVM"; export KVM
+PATH="$PATH"; export PATH
+TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE
+TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY
+TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG
+TORTURE_INITRD="$TORTURE_INITRD"; export TORTURE_INITRD
+TORTURE_KCONFIG_ARG="$TORTURE_KCONFIG_ARG"; export TORTURE_KCONFIG_ARG
+TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
+TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
+TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
+TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
+TORTURE_QEMU_MEM="$TORTURE_QEMU_MEM"; export TORTURE_QEMU_MEM
+TORTURE_SHUTDOWN_GRACE="$TORTURE_SHUTDOWN_GRACE"; export TORTURE_SHUTDOWN_GRACE
+TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE
+if ! test -e $resdir
+then
+ mkdir -p "$resdir" || :
+fi
+mkdir $resdir/$ds
+echo Results directory: $resdir/$ds
+echo $scriptname $args
+touch $resdir/$ds/log
+echo $scriptname $args >> $resdir/$ds/log
+echo ${TORTURE_SUITE} > $resdir/$ds/TORTURE_SUITE
+pwd > $resdir/$ds/testid.txt
+if test -d .git
+then
+ git status >> $resdir/$ds/testid.txt
+ git rev-parse HEAD >> $resdir/$ds/testid.txt
+ git diff HEAD >> $resdir/$ds/testid.txt
+fi
+___EOF___
+awk < $T/cfgcpu.pack \
+ -v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \
+ -v CONFIGDIR="$CONFIGFRAG/" \
+ -v KVM="$KVM" \
+ -v ncpus=$cpus \
+ -v jitter="$jitter" \
+ -v rd=$resdir/$ds/ \
+ -v dur=$dur \
+ -v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \
+ -v TORTURE_BOOTARGS="$TORTURE_BOOTARGS" \
+'BEGIN {
+ i = 0;
+}
+
+{
+ cf[i] = $1;
+ cpus[i] = $2;
+ i++;
+}
+
+# Dump out the scripting required to run one test batch.
+function dump(first, pastlast, batchnum)
+{
+ print "echo ----Start batch " batchnum ": `date` | tee -a " rd "log";
+ print "needqemurun="
+ jn=1
+ for (j = first; j < pastlast; j++) {
+ builddir=KVM "/b1"
+ cpusr[jn] = cpus[j];
+ if (cfrep[cf[j]] == "") {
+ cfr[jn] = cf[j];
+ cfrep[cf[j]] = 1;
+ } else {
+ cfrep[cf[j]]++;
+ cfr[jn] = cf[j] "." cfrep[cf[j]];
+ }
+ if (cpusr[jn] > ncpus && ncpus != 0)
+ ovf = "-ovf";
+ else
+ ovf = "";
+ print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log";
+ print "rm -f " builddir ".*";
+ print "touch " builddir ".wait";
+ print "mkdir " builddir " > /dev/null 2>&1 || :";
+ print "mkdir " rd cfr[jn] " || :";
+ print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &"
+ print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log";
+ print "while test -f " builddir ".wait"
+ print "do"
+ print "\tsleep 1"
+ print "done"
+ print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date` | tee -a " rd "log";
+ jn++;
+ }
+ for (j = 1; j < jn; j++) {
+ builddir=KVM "/b" j
+ print "rm -f " builddir ".ready"
+ print "if test -f \"" rd cfr[j] "/builtkernel\""
+ print "then"
+ print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date` | tee -a " rd "log";
+ print "\tneedqemurun=1"
+ print "fi"
+ }
+ njitter = 0;
+ split(jitter, ja);
+ if (ja[1] == -1 && ncpus == 0)
+ njitter = 1;
+ else if (ja[1] == -1)
+ njitter = ncpus;
+ else
+ njitter = ja[1];
+ if (TORTURE_BUILDONLY && njitter != 0) {
+ njitter = 0;
+ print "echo Build-only run, so suppressing jitter | tee -a " rd "log"
+ }
+ if (TORTURE_BUILDONLY) {
+ print "needqemurun="
+ }
+ print "if test -n \"$needqemurun\""
+ print "then"
+ print "\techo ---- Starting kernels. `date` | tee -a " rd "log";
+ for (j = 0; j < njitter; j++)
+ print "\tjitter.sh " j " " dur " " ja[2] " " ja[3] "&"
+ print "\twait"
+ print "\techo ---- All kernel runs complete. `date` | tee -a " rd "log";
+ print "else"
+ print "\twait"
+ print "\techo ---- No kernel runs. `date` | tee -a " rd "log";
+ print "fi"
+ for (j = 1; j < jn; j++) {
+ builddir=KVM "/b" j
+ print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results: | tee -a " rd "log";
+ print "cat " rd cfr[j] "/kvm-test-1-run.sh.out | tee -a " rd "log";
+ }
+}
+
+END {
+ njobs = i;
+ nc = ncpus;
+ first = 0;
+ batchnum = 1;
+
+ # Each pass through the following loop considers one test.
+ for (i = 0; i < njobs; i++) {
+ if (ncpus == 0) {
+ # Sequential test specified, each test its own batch.
+ dump(i, i + 1, batchnum);
+ first = i;
+ batchnum++;
+ } else if (nc < cpus[i] && i != 0) {
+ # Out of CPUs, dump out a batch.
+ dump(first, i, batchnum);
+ first = i;
+ nc = ncpus;
+ batchnum++;
+ }
+ # Account for the CPUs needed by the current test.
+ nc -= cpus[i];
+ }
+ # Dump the last batch.
+ if (ncpus != 0)
+ dump(first, i, batchnum);
+}' >> $T/script
+
+cat << ___EOF___ >> $T/script
+echo
+echo
+echo " --- `date` Test summary:"
+echo Results directory: $resdir/$ds
+kvm-recheck.sh $resdir/$ds
+___EOF___
+
+if test "$dryrun" = script
+then
+ cat $T/script
+ exit 0
+elif test "$dryrun" = sched
+then
+ # Extract the test run schedule from the script.
+ egrep 'Start batch|Starting build\.' $T/script |
+ grep -v ">>" |
+ sed -e 's/:.*$//' -e 's/^echo //'
+ exit 0
+else
+ # Not a dryrun, so run the script.
+ sh $T/script
+fi
+
+# Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier
diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh
new file mode 100755
index 000000000..24fe5f822
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+#
+# Check the build output from an rcutorture run for goodness.
+# The "file" is a pathname on the local system, and "title" is
+# a text string for error-message purposes.
+#
+# The file must contain kernel build output.
+#
+# Usage: parse-build.sh file title
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+F=$1
+title=$2
+T=${TMPDIR-/tmp}/parse-build.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+. functions.sh
+
+if grep -q CC < $F
+then
+ :
+else
+ print_bug $title no build
+ exit 1
+fi
+
+if grep -q "error:" < $F
+then
+ print_bug $title build errors:
+ grep "error:" < $F
+ exit 2
+fi
+
+grep warning: < $F > $T/warnings
+grep "include/linux/*rcu*\.h:" $T/warnings > $T/hwarnings
+grep "kernel/rcu/[^/]*:" $T/warnings > $T/cwarnings
+cat $T/hwarnings $T/cwarnings > $T/rcuwarnings
+if test -s $T/rcuwarnings
+then
+ print_warning $title build errors:
+ cat $T/rcuwarnings
+ exit 2
+fi
+exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
new file mode 100755
index 000000000..84933f6ae
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -0,0 +1,176 @@
+#!/bin/bash
+#
+# Check the console output from an rcutorture run for oopses.
+# The "file" is a pathname on the local system, and "title" is
+# a text string for error-message purposes.
+#
+# Usage: parse-console.sh file title
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+T=${TMPDIR-/tmp}/parse-console.sh.$$
+file="$1"
+title="$2"
+
+trap 'rm -f $T.seq $T.diags' 0
+
+. functions.sh
+
+# Check for presence and readability of console output file
+if test -f "$file" -a -r "$file"
+then
+ :
+else
+ echo $title unreadable console output file: $file
+ exit 1
+fi
+if grep -Pq '\x00' < $file
+then
+ print_warning Console output contains nul bytes, old qemu still running?
+fi
+cat /dev/null > $file.diags
+
+# Check for proper termination, except that rcuperf runs don't indicate this.
+if test "$TORTURE_SUITE" != rcuperf
+then
+ # check for abject failure
+
+ if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
+ then
+ nerrs=`grep --binary-files=text '!!!' $file |
+ tail -1 |
+ awk '
+ {
+ for (i=NF-8;i<=NF;i++)
+ sum+=$i;
+ }
+ END { print sum }'`
+ print_bug $title FAILURE, $nerrs instances
+ exit
+ fi
+
+ grep --binary-files=text 'torture:.*ver:' $file |
+ egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
+ sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
+ awk '
+ BEGIN {
+ ver = 0;
+ badseq = 0;
+ }
+
+ {
+ if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
+ badseqno1 = ver;
+ badseqno2 = $5;
+ badseqnr = NR;
+ badseq = 1;
+ }
+ ver = $5
+ }
+
+ END {
+ if (badseq) {
+ if (badseqno1 == badseqno2 && badseqno2 == ver)
+ print "GP HANG at " ver " torture stat " badseqnr;
+ else
+ print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
+ }
+ }' > $T.seq
+
+ if grep -q SUCCESS $file
+ then
+ if test -s $T.seq
+ then
+ print_warning $title `cat $T.seq`
+ echo " " $file
+ exit 2
+ fi
+ else
+ if grep -q "_HOTPLUG:" $file
+ then
+ print_warning HOTPLUG FAILURES $title `cat $T.seq`
+ echo " " $file
+ exit 3
+ fi
+ echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
+ if test -s $T.seq
+ then
+ print_warning $title `cat $T.seq`
+ fi
+ exit 2
+ fi
+fi | tee -a $file.diags
+
+egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
+grep -v 'ODEBUG: ' |
+grep -v 'Warning: unable to open an initial console' > $T.diags
+if test -s $T.diags
+then
+ print_warning "Assertion failure in $file $title"
+ # cat $T.diags
+ summary=""
+ n_badness=`grep -c Badness $file`
+ if test "$n_badness" -ne 0
+ then
+ summary="$summary Badness: $n_badness"
+ fi
+ n_warn=`grep -v 'Warning: unable to open an initial console' $file | egrep -c 'WARNING:|Warn'`
+ if test "$n_warn" -ne 0
+ then
+ summary="$summary Warnings: $n_warn"
+ fi
+ n_bugs=`egrep -c 'BUG|Oops:' $file`
+ if test "$n_bugs" -ne 0
+ then
+ summary="$summary Bugs: $n_bugs"
+ fi
+ n_calltrace=`grep -c 'Call Trace:' $file`
+ if test "$n_calltrace" -ne 0
+ then
+ summary="$summary Call Traces: $n_calltrace"
+ fi
+ n_lockdep=`grep -c =========== $file`
+ if test "$n_badness" -ne 0
+ then
+ summary="$summary lockdep: $n_badness"
+ fi
+ n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
+ if test "$n_stalls" -ne 0
+ then
+ summary="$summary Stalls: $n_stalls"
+ fi
+ n_starves=`grep -c 'rcu_.*kthread starved for' $file`
+ if test "$n_starves" -ne 0
+ then
+ summary="$summary Starves: $n_starves"
+ fi
+ print_warning Summary: $summary
+ cat $T.diags >> $file.diags
+fi
+for i in $file.*.diags
+do
+ if test -f "$i"
+ then
+ cat $i >> $file.diags
+ fi
+done
+if ! test -s $file.diags
+then
+ rm -f $file.diags
+fi
diff --git a/tools/testing/selftests/rcutorture/configs/lock/BUSTED b/tools/testing/selftests/rcutorture/configs/lock/BUSTED
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/BUSTED
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/BUSTED.boot b/tools/testing/selftests/rcutorture/configs/lock/BUSTED.boot
new file mode 100644
index 000000000..6386c15e9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/BUSTED.boot
@@ -0,0 +1 @@
+locktorture.torture_type=lock_busted
diff --git a/tools/testing/selftests/rcutorture/configs/lock/CFLIST b/tools/testing/selftests/rcutorture/configs/lock/CFLIST
new file mode 100644
index 000000000..41bae5824
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/CFLIST
@@ -0,0 +1,7 @@
+LOCK01
+LOCK02
+LOCK03
+LOCK04
+LOCK05
+LOCK06
+LOCK07
diff --git a/tools/testing/selftests/rcutorture/configs/lock/CFcommon b/tools/testing/selftests/rcutorture/configs/lock/CFcommon
new file mode 100644
index 000000000..e372dc269
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_LOCK_TORTURE_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK01 b/tools/testing/selftests/rcutorture/configs/lock/LOCK01
new file mode 100644
index 000000000..a9625e3d6
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK01
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK02 b/tools/testing/selftests/rcutorture/configs/lock/LOCK02
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK02
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK02.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK02.boot
new file mode 100644
index 000000000..5aa44b4f1
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK02.boot
@@ -0,0 +1 @@
+locktorture.torture_type=mutex_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK03 b/tools/testing/selftests/rcutorture/configs/lock/LOCK03
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK03
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK03.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK03.boot
new file mode 100644
index 000000000..a67bbe024
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK03.boot
@@ -0,0 +1 @@
+locktorture.torture_type=rwsem_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK04 b/tools/testing/selftests/rcutorture/configs/lock/LOCK04
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK04
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK04.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK04.boot
new file mode 100644
index 000000000..48c04fe47
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK04.boot
@@ -0,0 +1 @@
+locktorture.torture_type=rw_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK05 b/tools/testing/selftests/rcutorture/configs/lock/LOCK05
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK05
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot
new file mode 100644
index 000000000..8ac37307c
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot
@@ -0,0 +1 @@
+locktorture.torture_type=rtmutex_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK06 b/tools/testing/selftests/rcutorture/configs/lock/LOCK06
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK06
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot
new file mode 100644
index 000000000..f92219cd4
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot
@@ -0,0 +1 @@
+locktorture.torture_type=percpu_rwsem_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK07 b/tools/testing/selftests/rcutorture/configs/lock/LOCK07
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK07
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot
new file mode 100644
index 000000000..97dadd1a9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot
@@ -0,0 +1 @@
+locktorture.torture_type=ww_mutex_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
new file mode 100644
index 000000000..80eb646e1
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+#
+# Kernel-version-dependent shell functions for the rest of the scripts.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2014
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+# locktorture_param_onoff bootparam-string config-file
+#
+# Adds onoff locktorture module parameters to kernels having it.
+locktorture_param_onoff () {
+ if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2"
+ then
+ echo CPU-hotplug kernel, adding locktorture onoff. 1>&2
+ echo locktorture.onoff_interval=3 locktorture.onoff_holdoff=30
+ fi
+}
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+ echo $1 `locktorture_param_onoff "$1" "$2"` \
+ locktorture.stat_interval=15 \
+ locktorture.shutdown_secs=$3 \
+ locktorture.verbose=1
+}
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED
new file mode 100644
index 000000000..48d8a245c
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED
@@ -0,0 +1,7 @@
+CONFIG_RCU_TRACE=n
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot
new file mode 100644
index 000000000..be7728db4
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=busted
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
new file mode 100644
index 000000000..6a0b9f69f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -0,0 +1,18 @@
+TREE01
+TREE02
+TREE03
+TREE04
+TREE05
+TREE06
+TREE07
+TREE08
+TREE09
+SRCU-N
+SRCU-P
+SRCU-t
+SRCU-u
+TINY01
+TINY02
+TASKS01
+TASKS02
+TASKS03
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
new file mode 100644
index 000000000..d2d2a8613
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_RCU_TORTURE_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
new file mode 100644
index 000000000..2da8b4958
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
@@ -0,0 +1,8 @@
+CONFIG_RCU_TRACE=n
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
new file mode 100644
index 000000000..238bfe3bd
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=srcu
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
new file mode 100644
index 000000000..ab7ccd382
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
@@ -0,0 +1,12 @@
+CONFIG_RCU_TRACE=n
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_HOTPLUG_CPU=y
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_FANOUT=2
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
new file mode 100644
index 000000000..84a7d51b7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=srcud
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
new file mode 100644
index 000000000..6c78022c8
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
@@ -0,0 +1,10 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_SRCU=y
+CONFIG_RCU_TRACE=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+#CHECK#CONFIG_PREEMPT_COUNT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot
new file mode 100644
index 000000000..238bfe3bd
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=srcu
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
new file mode 100644
index 000000000..c15ada821
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
@@ -0,0 +1,10 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_SRCU=y
+CONFIG_RCU_TRACE=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_PREEMPT_COUNT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
new file mode 100644
index 000000000..84a7d51b7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=srcud
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
new file mode 100644
index 000000000..bafe94cbd
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
@@ -0,0 +1,10 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot
new file mode 100644
index 000000000..cd2a188ee
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
new file mode 100644
index 000000000..ad2be91e5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
@@ -0,0 +1,4 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
new file mode 100644
index 000000000..cd2a188ee
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
new file mode 100644
index 000000000..28568b72a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
@@ -0,0 +1,12 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=y
+#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot
new file mode 100644
index 000000000..838297c58
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks nohz_full=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY01 b/tools/testing/selftests/rcutorture/configs/rcu/TINY01
new file mode 100644
index 000000000..6db705e55
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY01
@@ -0,0 +1,13 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_TRACE=n
+#CHECK#CONFIG_RCU_STALL_COMMON=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_PREEMPT_COUNT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
new file mode 100644
index 000000000..d86742643
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
@@ -0,0 +1,14 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_RCU=y
+CONFIG_HZ_PERIODIC=y
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=n
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
new file mode 100644
index 000000000..6c1a292a6
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
@@ -0,0 +1,3 @@
+rcupdate.rcu_self_test=1
+rcupdate.rcu_self_test_bh=1
+rcutorture.torture_type=rcu_bh
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
new file mode 100644
index 000000000..b5b53973c
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -0,0 +1,18 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_RCU_TRACE=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_MAXSMP=y
+CONFIG_CPUMASK_OFFSTACK=y
+CONFIG_RCU_NOCB_CPU=y
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
new file mode 100644
index 000000000..9f3a4d28e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
@@ -0,0 +1,5 @@
+rcutorture.torture_type=rcu_bh maxcpus=8 nr_cpus=43
+rcutree.gp_preinit_delay=3
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
+rcu_nocbs=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
new file mode 100644
index 000000000..35e639e39
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
@@ -0,0 +1,23 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=3
+CONFIG_RCU_FANOUT_LEAF=3
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_RCU_EXPERT=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
new file mode 100644
index 000000000..2dc31b16e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
@@ -0,0 +1,18 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=16
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=y
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_TRACE=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_RCU_FANOUT=2
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_RCU_BOOST=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
new file mode 100644
index 000000000..5c3213cc3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@@ -0,0 +1,5 @@
+rcutorture.onoff_interval=200 rcutorture.onoff_holdoff=30
+rcutree.gp_preinit_delay=12
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
+rcutree.kthread_prio=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
new file mode 100644
index 000000000..24c9f6012
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -0,0 +1,20 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TREE_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=y
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_RCU_TRACE=y
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=4
+CONFIG_RCU_FANOUT_LEAF=3
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_EQS_DEBUG=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
new file mode 100644
index 000000000..e6071bb96
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4 nohz_full=1-7
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
new file mode 100644
index 000000000..2dde0d996
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
@@ -0,0 +1,20 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TREE_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=y
+CONFIG_RCU_FANOUT=6
+CONFIG_RCU_FANOUT_LEAF=6
+CONFIG_RCU_NOCB_CPU=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
new file mode 100644
index 000000000..c7fd050df
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
@@ -0,0 +1,5 @@
+rcutorture.torture_type=sched
+rcupdate.rcu_self_test_sched=1
+rcutree.gp_preinit_delay=3
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
new file mode 100644
index 000000000..05a4eec3f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
@@ -0,0 +1,23 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TREE_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=6
+CONFIG_RCU_FANOUT_LEAF=6
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
new file mode 100644
index 000000000..ad18b52a2
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
@@ -0,0 +1,7 @@
+rcupdate.rcu_self_test=1
+rcupdate.rcu_self_test_bh=1
+rcupdate.rcu_self_test_sched=1
+rcutree.rcu_fanout_exact=1
+rcutree.gp_preinit_delay=3
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
new file mode 100644
index 000000000..d7afb271a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -0,0 +1,17 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=16
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TREE_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=y
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_RCU_FANOUT=2
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot
new file mode 100644
index 000000000..d44609937
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot
@@ -0,0 +1 @@
+nohz_full=2-9
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
new file mode 100644
index 000000000..fb1c763c1
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
@@ -0,0 +1,23 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=3
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_RCU_NOCB_CPU=y
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_EQS_DEBUG=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
new file mode 100644
index 000000000..1bd8efc41
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
@@ -0,0 +1,5 @@
+rcutorture.torture_type=sched
+rcupdate.rcu_self_test=1
+rcupdate.rcu_self_test_sched=1
+rcutree.rcu_fanout_exact=1
+rcu_nocbs=0-7
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
new file mode 100644
index 000000000..6710e749d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
@@ -0,0 +1,18 @@
+CONFIG_SMP=n
+CONFIG_NR_CPUS=1
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
new file mode 100644
index 000000000..7bab82463
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+#
+# Kernel-version-dependent shell functions for the rest of the scripts.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2013
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+# rcutorture_param_n_barrier_cbs bootparam-string
+#
+# Adds n_barrier_cbs rcutorture module parameter to kernels having it.
+rcutorture_param_n_barrier_cbs () {
+ if echo $1 | grep -q "rcutorture\.n_barrier_cbs"
+ then
+ :
+ else
+ echo rcutorture.n_barrier_cbs=4
+ fi
+}
+
+# rcutorture_param_onoff bootparam-string config-file
+#
+# Adds onoff rcutorture module parameters to kernels having it.
+rcutorture_param_onoff () {
+ if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2"
+ then
+ echo CPU-hotplug kernel, adding rcutorture onoff. 1>&2
+ echo rcutorture.onoff_interval=1000 rcutorture.onoff_holdoff=30
+ fi
+}
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+ echo $1 `rcutorture_param_onoff "$1" "$2"` \
+ `rcutorture_param_n_barrier_cbs "$1"` \
+ rcutorture.stat_interval=15 \
+ rcutorture.shutdown_secs=$3 \
+ rcutorture.test_no_idle_hz=1 \
+ rcutorture.verbose=1
+}
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST b/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST
new file mode 100644
index 000000000..c9f56cf20
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST
@@ -0,0 +1 @@
+TREE
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
new file mode 100644
index 000000000..a09816b8c
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_RCU_PERF_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY b/tools/testing/selftests/rcutorture/configs/rcuperf/TINY
new file mode 100644
index 000000000..fb05ef527
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TINY
@@ -0,0 +1,16 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
new file mode 100644
index 000000000..721cfda76
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
@@ -0,0 +1,19 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
new file mode 100644
index 000000000..7629f5dd7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
@@ -0,0 +1,22 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=54
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=3
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
new file mode 100644
index 000000000..d36b8fd6f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+#
+# Torture-suite-dependent shell functions for the rest of the scripts.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2015
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+ echo $1 rcuperf.shutdown=1 \
+ rcuperf.verbose=1
+}
diff --git a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
new file mode 100644
index 000000000..a75b16991
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
@@ -0,0 +1,38 @@
+This document gives a brief rationale for the TINY_RCU test cases.
+
+
+Kconfig Parameters:
+
+CONFIG_DEBUG_LOCK_ALLOC -- Do all three and none of the three.
+CONFIG_PREEMPT_COUNT
+CONFIG_RCU_TRACE
+
+The theory here is that randconfig testing will hit the other six possible
+combinations of these parameters.
+
+
+Kconfig Parameters Ignored:
+
+CONFIG_DEBUG_OBJECTS_RCU_HEAD
+CONFIG_PROVE_RCU
+
+ In common code tested by TREE_RCU test cases.
+
+CONFIG_RCU_NOCB_CPU
+
+ Meaningless for TINY_RCU.
+
+CONFIG_RCU_STALL_COMMON
+CONFIG_RCU_TORTURE_TEST
+
+ Redundant with CONFIG_RCU_TRACE.
+
+CONFIG_HOTPLUG_CPU
+CONFIG_PREEMPT
+CONFIG_PREEMPT_RCU
+CONFIG_SMP
+CONFIG_TINY_RCU
+CONFIG_PREEMPT_RCU
+CONFIG_TREE_RCU
+
+ All forced by CONFIG_TINY_RCU.
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
new file mode 100644
index 000000000..af6fca036
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -0,0 +1,81 @@
+This document gives a brief rationale for the TREE_RCU-related test
+cases, a group that includes PREEMPT_RCU.
+
+
+Kconfig Parameters:
+
+CONFIG_DEBUG_LOCK_ALLOC -- Do three, covering CONFIG_PROVE_LOCKING & not.
+CONFIG_DEBUG_OBJECTS_RCU_HEAD -- Do one.
+CONFIG_HOTPLUG_CPU -- Do half. (Every second.)
+CONFIG_HZ_PERIODIC -- Do one.
+CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.)
+CONFIG_NO_HZ_FULL -- Do two, one with partial CPU enablement.
+CONFIG_PREEMPT -- Do half. (First three and #8.)
+CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not.
+CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
+CONFIG_RCU_BOOST -- one of PREEMPT_RCU.
+CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others.
+CONFIG_RCU_FANOUT_LEAF -- Do one non-default.
+CONFIG_RCU_FAST_NO_HZ -- Do one, but not with all nohz_full CPUs.
+CONFIG_RCU_NOCB_CPU -- Do three, one with no rcu_nocbs CPUs, one with
+ rcu_nocbs=0, and one with all rcu_nocbs CPUs.
+CONFIG_RCU_TRACE -- Do half.
+CONFIG_SMP -- Need one !SMP for PREEMPT_RCU.
+CONFIG_RCU_EXPERT=n -- Do a few, but these have to be vanilla configurations.
+CONFIG_RCU_EQS_DEBUG -- Do at least one for CONFIG_NO_HZ_FULL and not.
+
+RCU-bh: Do one with PREEMPT and one with !PREEMPT.
+RCU-sched: Do one with PREEMPT but not BOOST.
+
+
+Boot parameters:
+
+nohz_full - do at least one.
+maxcpu -- do at least one.
+rcupdate.rcu_self_test_bh -- Do at least one each, offloaded and not.
+rcupdate.rcu_self_test_sched -- Do at least one each, offloaded and not.
+rcupdate.rcu_self_test -- Do at least one each, offloaded and not.
+rcutree.rcu_fanout_exact -- Do at least one.
+
+
+Kconfig Parameters Ignored:
+
+CONFIG_64BIT
+
+ Used only to check CONFIG_RCU_FANOUT value, inspection suffices.
+
+CONFIG_PREEMPT_COUNT
+CONFIG_PREEMPT_RCU
+
+ Redundant with CONFIG_PREEMPT, ignore.
+
+CONFIG_RCU_BOOST_DELAY
+
+ Inspection suffices, ignore.
+
+CONFIG_RCU_CPU_STALL_TIMEOUT
+
+ Inspection suffices, ignore.
+
+CONFIG_RCU_STALL_COMMON
+
+ Implied by TREE_RCU and PREEMPT_RCU.
+
+CONFIG_RCU_TORTURE_TEST
+CONFIG_RCU_TORTURE_TEST_RUNNABLE
+
+ Always used in KVM testing.
+
+CONFIG_PREEMPT_RCU
+CONFIG_TREE_RCU
+CONFIG_TINY_RCU
+CONFIG_TASKS_RCU
+
+ These are controlled by CONFIG_PREEMPT and/or CONFIG_SMP.
+
+CONFIG_SRCU
+
+ Selected by CONFIG_RCU_TORTURE_TEST, so cannot disable.
+
+
+boot parameters ignored: TBD
diff --git a/tools/testing/selftests/rcutorture/doc/initrd.txt b/tools/testing/selftests/rcutorture/doc/initrd.txt
new file mode 100644
index 000000000..833f826d6
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/doc/initrd.txt
@@ -0,0 +1,113 @@
+This document describes one way to create the initrd directory hierarchy
+in order to allow an initrd to be built into your kernel. The trick
+here is to steal the initrd file used on your Linux laptop, Ubuntu in
+this case. There are probably much better ways of doing this.
+
+That said, here are the commands:
+
+------------------------------------------------------------------------
+cd tools/testing/selftests/rcutorture
+zcat /initrd.img > /tmp/initrd.img.zcat
+mkdir initrd
+cd initrd
+cpio -id < /tmp/initrd.img.zcat
+------------------------------------------------------------------------
+
+Another way to create an initramfs image is using "dracut"[1], which is
+available on many distros, however the initramfs dracut generates is a cpio
+archive with another cpio archive in it, so an extra step is needed to create
+the initrd directory hierarchy.
+
+Here are the commands to create a initrd directory for rcutorture using
+dracut:
+
+------------------------------------------------------------------------
+dracut --no-hostonly --no-hostonly-cmdline --module "base bash shutdown" /tmp/initramfs.img
+cd tools/testing/selftests/rcutorture
+mkdir initrd
+cd initrd
+/usr/lib/dracut/skipcpio /tmp/initramfs.img | zcat | cpio -id < /tmp/initramfs.img
+------------------------------------------------------------------------
+
+Interestingly enough, if you are running rcutorture, you don't really
+need userspace in many cases. Running without userspace has the
+advantage of allowing you to test your kernel independently of the
+distro in place, the root-filesystem layout, and so on. To make this
+happen, put the following script in the initrd's tree's "/init" file,
+with 0755 mode.
+
+------------------------------------------------------------------------
+#!/bin/sh
+
+[ -d /dev ] || mkdir -m 0755 /dev
+[ -d /root ] || mkdir -m 0700 /root
+[ -d /sys ] || mkdir /sys
+[ -d /proc ] || mkdir /proc
+[ -d /tmp ] || mkdir /tmp
+mkdir -p /var/lock
+mount -t sysfs -o nodev,noexec,nosuid sysfs /sys
+mount -t proc -o nodev,noexec,nosuid proc /proc
+# Some things don't work properly without /etc/mtab.
+ln -sf /proc/mounts /etc/mtab
+
+# Note that this only becomes /dev on the real filesystem if udev's scripts
+# are used; which they will be, but it's worth pointing out
+if ! mount -t devtmpfs -o mode=0755 udev /dev; then
+ echo "W: devtmpfs not available, falling back to tmpfs for /dev"
+ mount -t tmpfs -o mode=0755 udev /dev
+ [ -e /dev/console ] || mknod --mode=600 /dev/console c 5 1
+ [ -e /dev/kmsg ] || mknod --mode=644 /dev/kmsg c 1 11
+ [ -e /dev/null ] || mknod --mode=666 /dev/null c 1 3
+fi
+
+mkdir /dev/pts
+mount -t devpts -o noexec,nosuid,gid=5,mode=0620 devpts /dev/pts || true
+mount -t tmpfs -o "nosuid,size=20%,mode=0755" tmpfs /run
+mkdir /run/initramfs
+# compatibility symlink for the pre-oneiric locations
+ln -s /run/initramfs /dev/.initramfs
+
+# Export relevant variables
+export ROOT=
+export ROOTDELAY=
+export ROOTFLAGS=
+export ROOTFSTYPE=
+export IP=
+export BOOT=
+export BOOTIF=
+export UBIMTD=
+export break=
+export init=/sbin/init
+export quiet=n
+export readonly=y
+export rootmnt=/root
+export debug=
+export panic=
+export blacklist=
+export resume=
+export resume_offset=
+export recovery=
+
+for i in /sys/devices/system/cpu/cpu*/online
+do
+ case $i in
+ '/sys/devices/system/cpu/cpu0/online')
+ ;;
+ '/sys/devices/system/cpu/cpu*/online')
+ ;;
+ *)
+ echo 1 > $i
+ ;;
+ esac
+done
+
+while :
+do
+ sleep 10
+done
+------------------------------------------------------------------------
+
+References:
+[1]: https://dracut.wiki.kernel.org/index.php/Main_Page
+[2]: http://blog.elastocloud.org/2015/06/rapid-linux-kernel-devtest-with-qemu.html
+[3]: https://www.centos.org/forums/viewtopic.php?t=51621
diff --git a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
new file mode 100644
index 000000000..449cf579d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
@@ -0,0 +1,42 @@
+This document describes one way to create the rcu-test-image file
+that contains the filesystem used by the guest-OS kernel. There are
+probably much better ways of doing this, and this filesystem could no
+doubt be smaller. It is probably also possible to simply download
+an appropriate image from any number of places.
+
+That said, here are the commands:
+
+------------------------------------------------------------------------
+dd if=/dev/zero of=rcu-test-image bs=400M count=1
+mkfs.ext3 ./rcu-test-image
+sudo mount -o loop ./rcu-test-image /mnt
+
+# Replace "precise" below with your favorite Ubuntu release.
+# Empirical evidence says this image will work for 64-bit, but...
+# Note that debootstrap does take a few minutes to run. Or longer.
+sudo debootstrap --verbose --arch i386 precise /mnt http://archive.ubuntu.com/ubuntu
+cat << '___EOF___' | sudo dd of=/mnt/etc/fstab
+# UNCONFIGURED FSTAB FOR BASE SYSTEM
+#
+/dev/vda / ext3 defaults 1 1
+dev /dev tmpfs rw 0 0
+tmpfs /dev/shm tmpfs defaults 0 0
+devpts /dev/pts devpts gid=5,mode=620 0 0
+sysfs /sys sysfs defaults 0 0
+proc /proc proc defaults 0 0
+___EOF___
+sudo umount /mnt
+------------------------------------------------------------------------
+
+
+References:
+
+ http://sripathikodi.blogspot.com/2010/02/creating-kvm-bootable-fedora-system.html
+ https://help.ubuntu.com/community/KVM/CreateGuests
+ https://help.ubuntu.com/community/JeOSVMBuilder
+ http://wiki.libvirt.org/page/UbuntuKVMWalkthrough
+ http://www.moe.co.uk/2011/01/07/pci_add_option_rom-failed-to-find-romfile-pxe-rtl8139-bin/ -- "apt-get install kvm-pxe"
+ http://www.landley.net/writing/rootfs-howto.html
+ http://en.wikipedia.org/wiki/Initrd
+ http://en.wikipedia.org/wiki/Cpio
+ http://wiki.libvirt.org/page/UbuntuKVMWalkthrough
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
new file mode 100644
index 000000000..712a3d41a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
@@ -0,0 +1 @@
+srcu.c
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile
new file mode 100644
index 000000000..4bed0b678
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+all: srcu.c store_buffering
+
+LINUX_SOURCE = ../../../../../..
+
+modified_srcu_input = $(LINUX_SOURCE)/include/linux/srcu.h \
+ $(LINUX_SOURCE)/kernel/rcu/srcu.c
+
+modified_srcu_output = include/linux/srcu.h srcu.c
+
+include/linux/srcu.h: srcu.c
+
+srcu.c: modify_srcu.awk Makefile $(modified_srcu_input)
+ awk -f modify_srcu.awk $(modified_srcu_input) $(modified_srcu_output)
+
+store_buffering:
+ @cd tests/store_buffering; make
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
new file mode 100644
index 000000000..1d016e669
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
@@ -0,0 +1 @@
+srcu.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h
new file mode 100644
index 000000000..f2860dd1b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h
@@ -0,0 +1 @@
+#include <LINUX_SOURCE/linux/kconfig.h>
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
new file mode 100644
index 000000000..891ad13e9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This header has been modifies to remove definitions of types that
+ * are defined in standard userspace headers or are problematic for some
+ * other reason.
+ */
+
+#ifndef _LINUX_TYPES_H
+#define _LINUX_TYPES_H
+
+#define __EXPORTED_HEADERS__
+#include <uapi/linux/types.h>
+
+#ifndef __ASSEMBLY__
+
+#define DECLARE_BITMAP(name, bits) \
+ unsigned long name[BITS_TO_LONGS(bits)]
+
+typedef __u32 __kernel_dev_t;
+
+/* bsd */
+typedef unsigned char u_char;
+typedef unsigned short u_short;
+typedef unsigned int u_int;
+typedef unsigned long u_long;
+
+/* sysv */
+typedef unsigned char unchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+#ifndef __BIT_TYPES_DEFINED__
+#define __BIT_TYPES_DEFINED__
+
+typedef __u8 u_int8_t;
+typedef __s8 int8_t;
+typedef __u16 u_int16_t;
+typedef __s16 int16_t;
+typedef __u32 u_int32_t;
+typedef __s32 int32_t;
+
+#endif /* !(__BIT_TYPES_DEFINED__) */
+
+typedef __u8 uint8_t;
+typedef __u16 uint16_t;
+typedef __u32 uint32_t;
+
+/* this is a special 64bit data type that is 8-byte aligned */
+#define aligned_u64 __u64 __attribute__((aligned(8)))
+#define aligned_be64 __be64 __attribute__((aligned(8)))
+#define aligned_le64 __le64 __attribute__((aligned(8)))
+
+/**
+ * The type used for indexing onto a disc or disc partition.
+ *
+ * Linux always considers sectors to be 512 bytes long independently
+ * of the devices real block size.
+ *
+ * blkcnt_t is the type of the inode's block count.
+ */
+#ifdef CONFIG_LBDAF
+typedef u64 sector_t;
+#else
+typedef unsigned long sector_t;
+#endif
+
+/*
+ * The type of an index into the pagecache.
+ */
+#define pgoff_t unsigned long
+
+/*
+ * A dma_addr_t can hold any valid DMA address, i.e., any address returned
+ * by the DMA API.
+ *
+ * If the DMA API only uses 32-bit addresses, dma_addr_t need only be 32
+ * bits wide. Bus addresses, e.g., PCI BARs, may be wider than 32 bits,
+ * but drivers do memory-mapped I/O to ioremapped kernel virtual addresses,
+ * so they don't care about the size of the actual bus addresses.
+ */
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+typedef u64 dma_addr_t;
+#else
+typedef u32 dma_addr_t;
+#endif
+
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+typedef u64 phys_addr_t;
+#else
+typedef u32 phys_addr_t;
+#endif
+
+typedef phys_addr_t resource_size_t;
+
+/*
+ * This type is the placeholder for a hardware interrupt number. It has to be
+ * big enough to enclose whatever representation is used by a given platform.
+ */
+typedef unsigned long irq_hw_number_t;
+
+typedef struct {
+ int counter;
+} atomic_t;
+
+#ifdef CONFIG_64BIT
+typedef struct {
+ long counter;
+} atomic64_t;
+#endif
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+struct hlist_head {
+ struct hlist_node *first;
+};
+
+struct hlist_node {
+ struct hlist_node *next, **pprev;
+};
+
+/**
+ * struct callback_head - callback structure for use with RCU and task_work
+ * @next: next update requests in a list
+ * @func: actual update function to call after the grace period.
+ *
+ * The struct is aligned to size of pointer. On most architectures it happens
+ * naturally due ABI requirements, but some architectures (like CRIS) have
+ * weird ABI and we need to ask it explicitly.
+ *
+ * The alignment is required to guarantee that bits 0 and 1 of @next will be
+ * clear under normal conditions -- as long as we use call_rcu(),
+ * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback.
+ *
+ * This guarantee is important for few reasons:
+ * - future call_rcu_lazy() will make use of lower bits in the pointer;
+ * - the structure shares storage spacer in struct page with @compound_head,
+ * which encode PageTail() in bit 0. The guarantee is needed to avoid
+ * false-positive PageTail().
+ */
+struct callback_head {
+ struct callback_head *next;
+ void (*func)(struct callback_head *head);
+} __attribute__((aligned(sizeof(void *))));
+#define rcu_head callback_head
+
+typedef void (*rcu_callback_t)(struct rcu_head *head);
+typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func);
+
+/* clocksource cycle base type */
+typedef u64 cycle_t;
+
+#endif /* __ASSEMBLY__ */
+#endif /* _LINUX_TYPES_H */
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
new file mode 100755
index 000000000..e05182d3e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
@@ -0,0 +1,376 @@
+#!/usr/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
+
+# Modify SRCU for formal verification. The first argument should be srcu.h and
+# the second should be srcu.c. Outputs modified srcu.h and srcu.c into the
+# current directory.
+
+BEGIN {
+ if (ARGC != 5) {
+ print "Usange: input.h input.c output.h output.c" > "/dev/stderr";
+ exit 1;
+ }
+ h_output = ARGV[3];
+ c_output = ARGV[4];
+ ARGC = 3;
+
+ # Tokenize using FS and not RS as FS supports regular expressions. Each
+ # record is one line of source, except that backslashed lines are
+ # combined. Comments are treated as field separators, as are quotes.
+ quote_regexp="\"([^\\\\\"]|\\\\.)*\"";
+ comment_regexp="\\/\\*([^*]|\\*+[^*/])*\\*\\/|\\/\\/.*(\n|$)";
+ FS="([ \\\\\t\n\v\f;,.=(){}+*/<>&|^-]|\\[|\\]|" comment_regexp "|" quote_regexp ")+";
+
+ inside_srcu_struct = 0;
+ inside_srcu_init_def = 0;
+ srcu_init_param_name = "";
+ in_macro = 0;
+ brace_nesting = 0;
+ paren_nesting = 0;
+
+ # Allow the manipulation of the last field separator after has been
+ # seen.
+ last_fs = "";
+ # Whether the last field separator was intended to be output.
+ last_fs_print = 0;
+
+ # rcu_batches stores the initialization for each instance of struct
+ # rcu_batch
+
+ in_comment = 0;
+
+ outputfile = "";
+}
+
+{
+ prev_outputfile = outputfile;
+ if (FILENAME ~ /\.h$/) {
+ outputfile = h_output;
+ if (FNR != NR) {
+ print "Incorrect file order" > "/dev/stderr";
+ exit 1;
+ }
+ }
+ else
+ outputfile = c_output;
+
+ if (prev_outputfile && outputfile != prev_outputfile) {
+ new_outputfile = outputfile;
+ outputfile = prev_outputfile;
+ update_fieldsep("", 0);
+ outputfile = new_outputfile;
+ }
+}
+
+# Combine the next line into $0.
+function combine_line() {
+ ret = getline next_line;
+ if (ret == 0) {
+ # Don't allow two consecutive getlines at the end of the file
+ if (eof_found) {
+ print "Error: expected more input." > "/dev/stderr";
+ exit 1;
+ } else {
+ eof_found = 1;
+ }
+ } else if (ret == -1) {
+ print "Error reading next line of file" FILENAME > "/dev/stderr";
+ exit 1;
+ }
+ $0 = $0 "\n" next_line;
+}
+
+# Combine backslashed lines and multiline comments.
+function combine_backslashes() {
+ while (/\\$|\/\*([^*]|\*+[^*\/])*\**$/) {
+ combine_line();
+ }
+}
+
+function read_line() {
+ combine_line();
+ combine_backslashes();
+}
+
+# Print out field separators and update variables that depend on them. Only
+# print if p is true. Call with sep="" and p=0 to print out the last field
+# separator.
+function update_fieldsep(sep, p) {
+ # Count braces
+ sep_tmp = sep;
+ gsub(quote_regexp "|" comment_regexp, "", sep_tmp);
+ while (1)
+ {
+ if (sub("[^{}()]*\\{", "", sep_tmp)) {
+ brace_nesting++;
+ continue;
+ }
+ if (sub("[^{}()]*\\}", "", sep_tmp)) {
+ brace_nesting--;
+ if (brace_nesting < 0) {
+ print "Unbalanced braces!" > "/dev/stderr";
+ exit 1;
+ }
+ continue;
+ }
+ if (sub("[^{}()]*\\(", "", sep_tmp)) {
+ paren_nesting++;
+ continue;
+ }
+ if (sub("[^{}()]*\\)", "", sep_tmp)) {
+ paren_nesting--;
+ if (paren_nesting < 0) {
+ print "Unbalanced parenthesis!" > "/dev/stderr";
+ exit 1;
+ }
+ continue;
+ }
+
+ break;
+ }
+
+ if (last_fs_print)
+ printf("%s", last_fs) > outputfile;
+ last_fs = sep;
+ last_fs_print = p;
+}
+
+# Shifts the fields down by n positions. Calls next if there are no more. If p
+# is true then print out field separators.
+function shift_fields(n, p) {
+ do {
+ if (match($0, FS) > 0) {
+ update_fieldsep(substr($0, RSTART, RLENGTH), p);
+ if (RSTART + RLENGTH <= length())
+ $0 = substr($0, RSTART + RLENGTH);
+ else
+ $0 = "";
+ } else {
+ update_fieldsep("", 0);
+ print "" > outputfile;
+ next;
+ }
+ } while (--n > 0);
+}
+
+# Shifts and prints the first n fields.
+function print_fields(n) {
+ do {
+ update_fieldsep("", 0);
+ printf("%s", $1) > outputfile;
+ shift_fields(1, 1);
+ } while (--n > 0);
+}
+
+{
+ combine_backslashes();
+}
+
+# Print leading FS
+{
+ if (match($0, "^(" FS ")+") > 0) {
+ update_fieldsep(substr($0, RSTART, RLENGTH), 1);
+ if (RSTART + RLENGTH <= length())
+ $0 = substr($0, RSTART + RLENGTH);
+ else
+ $0 = "";
+ }
+}
+
+# Parse the line.
+{
+ while (NF > 0) {
+ if ($1 == "struct" && NF < 3) {
+ read_line();
+ continue;
+ }
+
+ if (FILENAME ~ /\.h$/ && !inside_srcu_struct &&
+ brace_nesting == 0 && paren_nesting == 0 &&
+ $1 == "struct" && $2 == "srcu_struct" &&
+ $0 ~ "^struct(" FS ")+srcu_struct(" FS ")+\\{") {
+ inside_srcu_struct = 1;
+ print_fields(2);
+ continue;
+ }
+ if (inside_srcu_struct && brace_nesting == 0 &&
+ paren_nesting == 0) {
+ inside_srcu_struct = 0;
+ update_fieldsep("", 0);
+ for (name in rcu_batches)
+ print "extern struct rcu_batch " name ";" > outputfile;
+ }
+
+ if (inside_srcu_struct && $1 == "struct" && $2 == "rcu_batch") {
+ # Move rcu_batches outside of the struct.
+ rcu_batches[$3] = "";
+ shift_fields(3, 1);
+ sub(/;[[:space:]]*$/, "", last_fs);
+ continue;
+ }
+
+ if (FILENAME ~ /\.h$/ && !inside_srcu_init_def &&
+ $1 == "#define" && $2 == "__SRCU_STRUCT_INIT") {
+ inside_srcu_init_def = 1;
+ srcu_init_param_name = $3;
+ in_macro = 1;
+ print_fields(3);
+ continue;
+ }
+ if (inside_srcu_init_def && brace_nesting == 0 &&
+ paren_nesting == 0) {
+ inside_srcu_init_def = 0;
+ in_macro = 0;
+ continue;
+ }
+
+ if (inside_srcu_init_def && brace_nesting == 1 &&
+ paren_nesting == 0 && last_fs ~ /\.[[:space:]]*$/ &&
+ $1 ~ /^[[:alnum:]_]+$/) {
+ name = $1;
+ if (name in rcu_batches) {
+ # Remove the dot.
+ sub(/\.[[:space:]]*$/, "", last_fs);
+
+ old_record = $0;
+ do
+ shift_fields(1, 0);
+ while (last_fs !~ /,/ || paren_nesting > 0);
+ end_loc = length(old_record) - length($0);
+ end_loc += index(last_fs, ",") - length(last_fs);
+
+ last_fs = substr(last_fs, index(last_fs, ",") + 1);
+ last_fs_print = 1;
+
+ match(old_record, "^"name"("FS")+=");
+ start_loc = RSTART + RLENGTH;
+
+ len = end_loc - start_loc;
+ initializer = substr(old_record, start_loc, len);
+ gsub(srcu_init_param_name "\\.", "", initializer);
+ rcu_batches[name] = initializer;
+ continue;
+ }
+ }
+
+ # Don't include a nonexistent file
+ if (!in_macro && $1 == "#include" && /^#include[[:space:]]+"rcu\.h"/) {
+ update_fieldsep("", 0);
+ next;
+ }
+
+ # Ignore most preprocessor stuff.
+ if (!in_macro && $1 ~ /#/) {
+ break;
+ }
+
+ if (brace_nesting > 0 && $1 ~ "^[[:alnum:]_]+$" && NF < 2) {
+ read_line();
+ continue;
+ }
+ if (brace_nesting > 0 &&
+ $0 ~ "^[[:alnum:]_]+[[:space:]]*(\\.|->)[[:space:]]*[[:alnum:]_]+" &&
+ $2 in rcu_batches) {
+ # Make uses of rcu_batches global. Somewhat unreliable.
+ shift_fields(1, 0);
+ print_fields(1);
+ continue;
+ }
+
+ if ($1 == "static" && NF < 3) {
+ read_line();
+ continue;
+ }
+ if ($1 == "static" && ($2 == "bool" && $3 == "try_check_zero" ||
+ $2 == "void" && $3 == "srcu_flip")) {
+ shift_fields(1, 1);
+ print_fields(2);
+ continue;
+ }
+
+ # Distinguish between read-side and write-side memory barriers.
+ if ($1 == "smp_mb" && NF < 2) {
+ read_line();
+ continue;
+ }
+ if (match($0, /^smp_mb[[:space:]();\/*]*[[:alnum:]]/)) {
+ barrier_letter = substr($0, RLENGTH, 1);
+ if (barrier_letter ~ /A|D/)
+ new_barrier_name = "sync_smp_mb";
+ else if (barrier_letter ~ /B|C/)
+ new_barrier_name = "rs_smp_mb";
+ else {
+ print "Unrecognized memory barrier." > "/dev/null";
+ exit 1;
+ }
+
+ shift_fields(1, 1);
+ printf("%s", new_barrier_name) > outputfile;
+ continue;
+ }
+
+ # Skip definition of rcu_synchronize, since it is already
+ # defined in misc.h. Only present in old versions of srcu.
+ if (brace_nesting == 0 && paren_nesting == 0 &&
+ $1 == "struct" && $2 == "rcu_synchronize" &&
+ $0 ~ "^struct(" FS ")+rcu_synchronize(" FS ")+\\{") {
+ shift_fields(2, 0);
+ while (brace_nesting) {
+ if (NF < 2)
+ read_line();
+ shift_fields(1, 0);
+ }
+ }
+
+ # Skip definition of wakeme_after_rcu for the same reason
+ if (brace_nesting == 0 && $1 == "static" && $2 == "void" &&
+ $3 == "wakeme_after_rcu") {
+ while (NF < 5)
+ read_line();
+ shift_fields(3, 0);
+ do {
+ while (NF < 3)
+ read_line();
+ shift_fields(1, 0);
+ } while (paren_nesting || brace_nesting);
+ }
+
+ if ($1 ~ /^(unsigned|long)$/ && NF < 3) {
+ read_line();
+ continue;
+ }
+
+ # Give srcu_batches_completed the correct type for old SRCU.
+ if (brace_nesting == 0 && $1 == "long" &&
+ $2 == "srcu_batches_completed") {
+ update_fieldsep("", 0);
+ printf("unsigned ") > outputfile;
+ print_fields(2);
+ continue;
+ }
+ if (brace_nesting == 0 && $1 == "unsigned" && $2 == "long" &&
+ $3 == "srcu_batches_completed") {
+ print_fields(3);
+ continue;
+ }
+
+ # Just print out the input code by default.
+ print_fields(1);
+ }
+ update_fieldsep("", 0);
+ print > outputfile;
+ next;
+}
+
+END {
+ update_fieldsep("", 0);
+
+ if (brace_nesting != 0) {
+ print "Unbalanced braces!" > "/dev/stderr";
+ exit 1;
+ }
+
+ # Define the rcu_batches
+ for (name in rcu_batches)
+ print "struct rcu_batch " name " = " rcu_batches[name] ";" > c_output;
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h
new file mode 100644
index 000000000..570a49d9d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASSUME_H
+#define ASSUME_H
+
+/* Provide an assumption macro that can be disabled for gcc. */
+#ifdef RUN
+#define assume(x) \
+ do { \
+ /* Evaluate x to suppress warnings. */ \
+ (void) (x); \
+ } while (0)
+
+#else
+#define assume(x) __CPROVER_assume(x)
+#endif
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h
new file mode 100644
index 000000000..3f95a768a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BARRIERS_H
+#define BARRIERS_H
+
+#define barrier() __asm__ __volatile__("" : : : "memory")
+
+#ifdef RUN
+#define smp_mb() __sync_synchronize()
+#define smp_mb__after_unlock_lock() __sync_synchronize()
+#else
+/*
+ * Copied from CBMC's implementation of __sync_synchronize(), which
+ * seems to be disabled by default.
+ */
+#define smp_mb() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \
+ "WWcumul", "RRcumul", "RWcumul", "WRcumul")
+#define smp_mb__after_unlock_lock() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \
+ "WWcumul", "RRcumul", "RWcumul", "WRcumul")
+#endif
+
+/*
+ * Allow memory barriers to be disabled in either the read or write side
+ * of SRCU individually.
+ */
+
+#ifndef NO_SYNC_SMP_MB
+#define sync_smp_mb() smp_mb()
+#else
+#define sync_smp_mb() do {} while (0)
+#endif
+
+#ifndef NO_READ_SIDE_SMP_MB
+#define rs_smp_mb() smp_mb()
+#else
+#define rs_smp_mb() do {} while (0)
+#endif
+
+#define READ_ONCE(x) (*(volatile typeof(x) *) &(x))
+#define WRITE_ONCE(x) ((*(volatile typeof(x) *) &(x)) = (val))
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h
new file mode 100644
index 000000000..5e7912c6a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BUG_ON_H
+#define BUG_ON_H
+
+#include <assert.h>
+
+#define BUG() assert(0)
+#define BUG_ON(x) assert(!(x))
+
+/* Does it make sense to treat warnings as errors? */
+#define WARN() BUG()
+#define WARN_ON(x) (BUG_ON(x), false)
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c
new file mode 100644
index 000000000..e67ee5b3d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <config.h>
+
+/* Include all source files. */
+
+#include "include_srcu.c"
+
+#include "preempt.c"
+#include "misc.c"
+
+/* Used by test.c files */
+#include <pthread.h>
+#include <stdlib.h>
+#include <linux/srcu.h>
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h
new file mode 100644
index 000000000..283d71033
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* "Cheater" definitions based on restricted Kconfig choices. */
+
+#undef CONFIG_TINY_RCU
+#undef __CHECKER__
+#undef CONFIG_DEBUG_LOCK_ALLOC
+#undef CONFIG_DEBUG_OBJECTS_RCU_HEAD
+#undef CONFIG_HOTPLUG_CPU
+#undef CONFIG_MODULES
+#undef CONFIG_NO_HZ_FULL_SYSIDLE
+#undef CONFIG_PREEMPT_COUNT
+#undef CONFIG_PREEMPT_RCU
+#undef CONFIG_PROVE_RCU
+#undef CONFIG_RCU_NOCB_CPU
+#undef CONFIG_RCU_NOCB_CPU_ALL
+#undef CONFIG_RCU_STALL_COMMON
+#undef CONFIG_RCU_TRACE
+#undef CONFIG_RCU_USER_QS
+#undef CONFIG_TASKS_RCU
+#define CONFIG_TREE_RCU
+
+#define CONFIG_GENERIC_ATOMIC64
+
+#if NR_CPUS > 1
+#define CONFIG_SMP
+#else
+#undef CONFIG_SMP
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c
new file mode 100644
index 000000000..e5202d4cf
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <config.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "int_typedefs.h"
+
+#include "barriers.h"
+#include "bug_on.h"
+#include "locks.h"
+#include "misc.h"
+#include "preempt.h"
+#include "percpu.h"
+#include "workqueues.h"
+
+#ifdef USE_SIMPLE_SYNC_SRCU
+#define synchronize_srcu(sp) synchronize_srcu_original(sp)
+#endif
+
+#include <srcu.c>
+
+#ifdef USE_SIMPLE_SYNC_SRCU
+#undef synchronize_srcu
+
+#include "simple_sync_srcu.c"
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h
new file mode 100644
index 000000000..0dd27aa51
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef INT_TYPEDEFS_H
+#define INT_TYPEDEFS_H
+
+#include <inttypes.h>
+
+typedef int8_t s8;
+typedef uint8_t u8;
+typedef int16_t s16;
+typedef uint16_t u16;
+typedef int32_t s32;
+typedef uint32_t u32;
+typedef int64_t s64;
+typedef uint64_t u64;
+
+typedef int8_t __s8;
+typedef uint8_t __u8;
+typedef int16_t __s16;
+typedef uint16_t __u16;
+typedef int32_t __s32;
+typedef uint32_t __u32;
+typedef int64_t __s64;
+typedef uint64_t __u64;
+
+#define S8_C(x) INT8_C(x)
+#define U8_C(x) UINT8_C(x)
+#define S16_C(x) INT16_C(x)
+#define U16_C(x) UINT16_C(x)
+#define S32_C(x) INT32_C(x)
+#define U32_C(x) UINT32_C(x)
+#define S64_C(x) INT64_C(x)
+#define U64_C(x) UINT64_C(x)
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
new file mode 100644
index 000000000..cf6938d67
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LOCKS_H
+#define LOCKS_H
+
+#include <limits.h>
+#include <pthread.h>
+#include <stdbool.h>
+
+#include "assume.h"
+#include "bug_on.h"
+#include "preempt.h"
+
+int nondet_int(void);
+
+#define __acquire(x)
+#define __acquires(x)
+#define __release(x)
+#define __releases(x)
+
+/* Only use one lock mechanism. Select which one. */
+#ifdef PTHREAD_LOCK
+struct lock_impl {
+ pthread_mutex_t mutex;
+};
+
+static inline void lock_impl_lock(struct lock_impl *lock)
+{
+ BUG_ON(pthread_mutex_lock(&lock->mutex));
+}
+
+static inline void lock_impl_unlock(struct lock_impl *lock)
+{
+ BUG_ON(pthread_mutex_unlock(&lock->mutex));
+}
+
+static inline bool lock_impl_trylock(struct lock_impl *lock)
+{
+ int err = pthread_mutex_trylock(&lock->mutex);
+
+ if (!err)
+ return true;
+ else if (err == EBUSY)
+ return false;
+ BUG();
+}
+
+static inline void lock_impl_init(struct lock_impl *lock)
+{
+ pthread_mutex_init(&lock->mutex, NULL);
+}
+
+#define LOCK_IMPL_INITIALIZER {.mutex = PTHREAD_MUTEX_INITIALIZER}
+
+#else /* !defined(PTHREAD_LOCK) */
+/* Spinlock that assumes that it always gets the lock immediately. */
+
+struct lock_impl {
+ bool locked;
+};
+
+static inline bool lock_impl_trylock(struct lock_impl *lock)
+{
+#ifdef RUN
+ /* TODO: Should this be a test and set? */
+ return __sync_bool_compare_and_swap(&lock->locked, false, true);
+#else
+ __CPROVER_atomic_begin();
+ bool old_locked = lock->locked;
+ lock->locked = true;
+ __CPROVER_atomic_end();
+
+ /* Minimal barrier to prevent accesses leaking out of lock. */
+ __CPROVER_fence("RRfence", "RWfence");
+
+ return !old_locked;
+#endif
+}
+
+static inline void lock_impl_lock(struct lock_impl *lock)
+{
+ /*
+ * CBMC doesn't support busy waiting, so just assume that the
+ * lock is available.
+ */
+ assume(lock_impl_trylock(lock));
+
+ /*
+ * If the lock was already held by this thread then the assumption
+ * is unsatisfiable (deadlock).
+ */
+}
+
+static inline void lock_impl_unlock(struct lock_impl *lock)
+{
+#ifdef RUN
+ BUG_ON(!__sync_bool_compare_and_swap(&lock->locked, true, false));
+#else
+ /* Minimal barrier to prevent accesses leaking out of lock. */
+ __CPROVER_fence("RWfence", "WWfence");
+
+ __CPROVER_atomic_begin();
+ bool old_locked = lock->locked;
+ lock->locked = false;
+ __CPROVER_atomic_end();
+
+ BUG_ON(!old_locked);
+#endif
+}
+
+static inline void lock_impl_init(struct lock_impl *lock)
+{
+ lock->locked = false;
+}
+
+#define LOCK_IMPL_INITIALIZER {.locked = false}
+
+#endif /* !defined(PTHREAD_LOCK) */
+
+/*
+ * Implement spinlocks using the lock mechanism. Wrap the lock to prevent mixing
+ * locks of different types.
+ */
+typedef struct {
+ struct lock_impl internal_lock;
+} spinlock_t;
+
+#define SPIN_LOCK_UNLOCKED {.internal_lock = LOCK_IMPL_INITIALIZER}
+#define __SPIN_LOCK_UNLOCKED(x) SPIN_LOCK_UNLOCKED
+#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
+
+static inline void spin_lock_init(spinlock_t *lock)
+{
+ lock_impl_init(&lock->internal_lock);
+}
+
+static inline void spin_lock(spinlock_t *lock)
+{
+ /*
+ * Spin locks also need to be removed in order to eliminate all
+ * memory barriers. They are only used by the write side anyway.
+ */
+#ifndef NO_SYNC_SMP_MB
+ preempt_disable();
+ lock_impl_lock(&lock->internal_lock);
+#endif
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+#ifndef NO_SYNC_SMP_MB
+ lock_impl_unlock(&lock->internal_lock);
+ preempt_enable();
+#endif
+}
+
+/* Don't bother with interrupts */
+#define spin_lock_irq(lock) spin_lock(lock)
+#define spin_unlock_irq(lock) spin_unlock(lock)
+#define spin_lock_irqsave(lock, flags) spin_lock(lock)
+#define spin_unlock_irqrestore(lock, flags) spin_unlock(lock)
+
+/*
+ * This is supposed to return an int, but I think that a bool should work as
+ * well.
+ */
+static inline bool spin_trylock(spinlock_t *lock)
+{
+#ifndef NO_SYNC_SMP_MB
+ preempt_disable();
+ return lock_impl_trylock(&lock->internal_lock);
+#else
+ return true;
+#endif
+}
+
+struct completion {
+ /* Hopefuly this won't overflow. */
+ unsigned int count;
+};
+
+#define COMPLETION_INITIALIZER(x) {.count = 0}
+#define DECLARE_COMPLETION(x) struct completion x = COMPLETION_INITIALIZER(x)
+#define DECLARE_COMPLETION_ONSTACK(x) DECLARE_COMPLETION(x)
+
+static inline void init_completion(struct completion *c)
+{
+ c->count = 0;
+}
+
+static inline void wait_for_completion(struct completion *c)
+{
+ unsigned int prev_count = __sync_fetch_and_sub(&c->count, 1);
+
+ assume(prev_count);
+}
+
+static inline void complete(struct completion *c)
+{
+ unsigned int prev_count = __sync_fetch_and_add(&c->count, 1);
+
+ BUG_ON(prev_count == UINT_MAX);
+}
+
+/* This function probably isn't very useful for CBMC. */
+static inline bool try_wait_for_completion(struct completion *c)
+{
+ BUG();
+}
+
+static inline bool completion_done(struct completion *c)
+{
+ return c->count;
+}
+
+/* TODO: Implement complete_all */
+static inline void complete_all(struct completion *c)
+{
+ BUG();
+}
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c
new file mode 100644
index 000000000..9440cc39e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <config.h>
+
+#include "misc.h"
+#include "bug_on.h"
+
+struct rcu_head;
+
+void wakeme_after_rcu(struct rcu_head *head)
+{
+ BUG();
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h
new file mode 100644
index 000000000..aca50030f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h
@@ -0,0 +1,58 @@
+#ifndef MISC_H
+#define MISC_H
+
+#include "assume.h"
+#include "int_typedefs.h"
+#include "locks.h"
+
+#include <linux/types.h>
+
+/* Probably won't need to deal with bottom halves. */
+static inline void local_bh_disable(void) {}
+static inline void local_bh_enable(void) {}
+
+#define MODULE_ALIAS(X)
+#define module_param(...)
+#define EXPORT_SYMBOL_GPL(x)
+
+#define container_of(ptr, type, member) ({ \
+ const typeof(((type *)0)->member) *__mptr = (ptr); \
+ (type *)((char *)__mptr - offsetof(type, member)); \
+})
+
+#ifndef USE_SIMPLE_SYNC_SRCU
+/* Abuse udelay to make sure that busy loops terminate. */
+#define udelay(x) assume(0)
+
+#else
+
+/* The simple custom synchronize_srcu is ok with try_check_zero failing. */
+#define udelay(x) do { } while (0)
+#endif
+
+#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+ do { } while (0)
+
+#define notrace
+
+/* Avoid including rcupdate.h */
+struct rcu_synchronize {
+ struct rcu_head head;
+ struct completion completion;
+};
+
+void wakeme_after_rcu(struct rcu_head *head);
+
+#define rcu_lock_acquire(a) do { } while (0)
+#define rcu_lock_release(a) do { } while (0)
+#define rcu_lockdep_assert(c, s) do { } while (0)
+#define RCU_LOCKDEP_WARN(c, s) do { } while (0)
+
+/* Let CBMC non-deterministically choose switch between normal and expedited. */
+bool rcu_gp_is_normal(void);
+bool rcu_gp_is_expedited(void);
+
+/* Do the same for old versions of rcu. */
+#define rcu_expedited (rcu_gp_is_expedited())
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h
new file mode 100644
index 000000000..27e67a3f2
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERCPU_H
+#define PERCPU_H
+
+#include <stddef.h>
+#include "bug_on.h"
+#include "preempt.h"
+
+#define __percpu
+
+/* Maximum size of any percpu data. */
+#define PERCPU_OFFSET (4 * sizeof(long))
+
+/* Ignore alignment, as CBMC doesn't care about false sharing. */
+#define alloc_percpu(type) __alloc_percpu(sizeof(type), 1)
+
+static inline void *__alloc_percpu(size_t size, size_t align)
+{
+ BUG();
+ return NULL;
+}
+
+static inline void free_percpu(void *ptr)
+{
+ BUG();
+}
+
+#define per_cpu_ptr(ptr, cpu) \
+ ((typeof(ptr)) ((char *) (ptr) + PERCPU_OFFSET * cpu))
+
+#define __this_cpu_inc(pcp) __this_cpu_add(pcp, 1)
+#define __this_cpu_dec(pcp) __this_cpu_sub(pcp, 1)
+#define __this_cpu_sub(pcp, n) __this_cpu_add(pcp, -(typeof(pcp)) (n))
+
+#define this_cpu_inc(pcp) this_cpu_add(pcp, 1)
+#define this_cpu_dec(pcp) this_cpu_sub(pcp, 1)
+#define this_cpu_sub(pcp, n) this_cpu_add(pcp, -(typeof(pcp)) (n))
+
+/* Make CBMC use atomics to work around bug. */
+#ifdef RUN
+#define THIS_CPU_ADD_HELPER(ptr, x) (*(ptr) += (x))
+#else
+/*
+ * Split the atomic into a read and a write so that it has the least
+ * possible ordering.
+ */
+#define THIS_CPU_ADD_HELPER(ptr, x) \
+ do { \
+ typeof(ptr) this_cpu_add_helper_ptr = (ptr); \
+ typeof(ptr) this_cpu_add_helper_x = (x); \
+ typeof(*ptr) this_cpu_add_helper_temp; \
+ __CPROVER_atomic_begin(); \
+ this_cpu_add_helper_temp = *(this_cpu_add_helper_ptr); \
+ __CPROVER_atomic_end(); \
+ this_cpu_add_helper_temp += this_cpu_add_helper_x; \
+ __CPROVER_atomic_begin(); \
+ *(this_cpu_add_helper_ptr) = this_cpu_add_helper_temp; \
+ __CPROVER_atomic_end(); \
+ } while (0)
+#endif
+
+/*
+ * For some reason CBMC needs an atomic operation even though this is percpu
+ * data.
+ */
+#define __this_cpu_add(pcp, n) \
+ do { \
+ BUG_ON(preemptible()); \
+ THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), thread_cpu_id), \
+ (typeof(pcp)) (n)); \
+ } while (0)
+
+#define this_cpu_add(pcp, n) \
+ do { \
+ int this_cpu_add_impl_cpu = get_cpu(); \
+ THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), this_cpu_add_impl_cpu), \
+ (typeof(pcp)) (n)); \
+ put_cpu(); \
+ } while (0)
+
+/*
+ * This will cause a compiler warning because of the cast from char[][] to
+ * type*. This will cause a compile time error if type is too big.
+ */
+#define DEFINE_PER_CPU(type, name) \
+ char name[NR_CPUS][PERCPU_OFFSET]; \
+ typedef char percpu_too_big_##name \
+ [sizeof(type) > PERCPU_OFFSET ? -1 : 1]
+
+#define for_each_possible_cpu(cpu) \
+ for ((cpu) = 0; (cpu) < NR_CPUS; ++(cpu))
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c
new file mode 100644
index 000000000..b4083ae34
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <config.h>
+
+#include "preempt.h"
+
+#include "assume.h"
+#include "locks.h"
+
+/* Support NR_CPUS of at most 64 */
+#define CPU_PREEMPTION_LOCKS_INIT0 LOCK_IMPL_INITIALIZER
+#define CPU_PREEMPTION_LOCKS_INIT1 \
+ CPU_PREEMPTION_LOCKS_INIT0, CPU_PREEMPTION_LOCKS_INIT0
+#define CPU_PREEMPTION_LOCKS_INIT2 \
+ CPU_PREEMPTION_LOCKS_INIT1, CPU_PREEMPTION_LOCKS_INIT1
+#define CPU_PREEMPTION_LOCKS_INIT3 \
+ CPU_PREEMPTION_LOCKS_INIT2, CPU_PREEMPTION_LOCKS_INIT2
+#define CPU_PREEMPTION_LOCKS_INIT4 \
+ CPU_PREEMPTION_LOCKS_INIT3, CPU_PREEMPTION_LOCKS_INIT3
+#define CPU_PREEMPTION_LOCKS_INIT5 \
+ CPU_PREEMPTION_LOCKS_INIT4, CPU_PREEMPTION_LOCKS_INIT4
+
+/*
+ * Simulate disabling preemption by locking a particular cpu. NR_CPUS
+ * should be the actual number of cpus, not just the maximum.
+ */
+struct lock_impl cpu_preemption_locks[NR_CPUS] = {
+ CPU_PREEMPTION_LOCKS_INIT0
+#if (NR_CPUS - 1) & 1
+ , CPU_PREEMPTION_LOCKS_INIT0
+#endif
+#if (NR_CPUS - 1) & 2
+ , CPU_PREEMPTION_LOCKS_INIT1
+#endif
+#if (NR_CPUS - 1) & 4
+ , CPU_PREEMPTION_LOCKS_INIT2
+#endif
+#if (NR_CPUS - 1) & 8
+ , CPU_PREEMPTION_LOCKS_INIT3
+#endif
+#if (NR_CPUS - 1) & 16
+ , CPU_PREEMPTION_LOCKS_INIT4
+#endif
+#if (NR_CPUS - 1) & 32
+ , CPU_PREEMPTION_LOCKS_INIT5
+#endif
+};
+
+#undef CPU_PREEMPTION_LOCKS_INIT0
+#undef CPU_PREEMPTION_LOCKS_INIT1
+#undef CPU_PREEMPTION_LOCKS_INIT2
+#undef CPU_PREEMPTION_LOCKS_INIT3
+#undef CPU_PREEMPTION_LOCKS_INIT4
+#undef CPU_PREEMPTION_LOCKS_INIT5
+
+__thread int thread_cpu_id;
+__thread int preempt_disable_count;
+
+void preempt_disable(void)
+{
+ BUG_ON(preempt_disable_count < 0 || preempt_disable_count == INT_MAX);
+
+ if (preempt_disable_count++)
+ return;
+
+ thread_cpu_id = nondet_int();
+ assume(thread_cpu_id >= 0);
+ assume(thread_cpu_id < NR_CPUS);
+ lock_impl_lock(&cpu_preemption_locks[thread_cpu_id]);
+}
+
+void preempt_enable(void)
+{
+ BUG_ON(preempt_disable_count < 1);
+
+ if (--preempt_disable_count)
+ return;
+
+ lock_impl_unlock(&cpu_preemption_locks[thread_cpu_id]);
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h
new file mode 100644
index 000000000..f8b762cd2
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PREEMPT_H
+#define PREEMPT_H
+
+#include <stdbool.h>
+
+#include "bug_on.h"
+
+/* This flag contains garbage if preempt_disable_count is 0. */
+extern __thread int thread_cpu_id;
+
+/* Support recursive preemption disabling. */
+extern __thread int preempt_disable_count;
+
+void preempt_disable(void);
+void preempt_enable(void);
+
+static inline void preempt_disable_notrace(void)
+{
+ preempt_disable();
+}
+
+static inline void preempt_enable_no_resched(void)
+{
+ preempt_enable();
+}
+
+static inline void preempt_enable_notrace(void)
+{
+ preempt_enable();
+}
+
+static inline int preempt_count(void)
+{
+ return preempt_disable_count;
+}
+
+static inline bool preemptible(void)
+{
+ return !preempt_count();
+}
+
+static inline int get_cpu(void)
+{
+ preempt_disable();
+ return thread_cpu_id;
+}
+
+static inline void put_cpu(void)
+{
+ preempt_enable();
+}
+
+static inline void might_sleep(void)
+{
+ BUG_ON(preempt_disable_count);
+}
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c
new file mode 100644
index 000000000..97f592048
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <config.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "int_typedefs.h"
+
+#include "barriers.h"
+#include "bug_on.h"
+#include "locks.h"
+#include "misc.h"
+#include "preempt.h"
+#include "percpu.h"
+#include "workqueues.h"
+
+#include <linux/srcu.h>
+
+/* Functions needed from modify_srcu.c */
+bool try_check_zero(struct srcu_struct *sp, int idx, int trycount);
+void srcu_flip(struct srcu_struct *sp);
+
+/* Simpler implementation of synchronize_srcu that ignores batching. */
+void synchronize_srcu(struct srcu_struct *sp)
+{
+ int idx;
+ /*
+ * This code assumes that try_check_zero will succeed anyway,
+ * so there is no point in multiple tries.
+ */
+ const int trycount = 1;
+
+ might_sleep();
+
+ /* Ignore the lock, as multiple writers aren't working yet anyway. */
+
+ idx = 1 ^ (sp->completed & 1);
+
+ /* For comments see srcu_advance_batches. */
+
+ assume(try_check_zero(sp, idx, trycount));
+
+ srcu_flip(sp);
+
+ assume(try_check_zero(sp, idx^1, trycount));
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h
new file mode 100644
index 000000000..28b960300
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef WORKQUEUES_H
+#define WORKQUEUES_H
+
+#include <stdbool.h>
+
+#include "barriers.h"
+#include "bug_on.h"
+#include "int_typedefs.h"
+
+#include <linux/types.h>
+
+/* Stub workqueue implementation. */
+
+struct work_struct;
+typedef void (*work_func_t)(struct work_struct *work);
+void delayed_work_timer_fn(unsigned long __data);
+
+struct work_struct {
+/* atomic_long_t data; */
+ unsigned long data;
+
+ struct list_head entry;
+ work_func_t func;
+#ifdef CONFIG_LOCKDEP
+ struct lockdep_map lockdep_map;
+#endif
+};
+
+struct timer_list {
+ struct hlist_node entry;
+ unsigned long expires;
+ void (*function)(unsigned long);
+ unsigned long data;
+ u32 flags;
+ int slack;
+};
+
+struct delayed_work {
+ struct work_struct work;
+ struct timer_list timer;
+
+ /* target workqueue and CPU ->timer uses to queue ->work */
+ struct workqueue_struct *wq;
+ int cpu;
+};
+
+
+static inline bool schedule_work(struct work_struct *work)
+{
+ BUG();
+ return true;
+}
+
+static inline bool schedule_work_on(int cpu, struct work_struct *work)
+{
+ BUG();
+ return true;
+}
+
+static inline bool queue_work(struct workqueue_struct *wq,
+ struct work_struct *work)
+{
+ BUG();
+ return true;
+}
+
+static inline bool queue_delayed_work(struct workqueue_struct *wq,
+ struct delayed_work *dwork,
+ unsigned long delay)
+{
+ BUG();
+ return true;
+}
+
+#define INIT_WORK(w, f) \
+ do { \
+ (w)->data = 0; \
+ (w)->func = (f); \
+ } while (0)
+
+#define INIT_DELAYED_WORK(w, f) INIT_WORK(&(w)->work, (f))
+
+#define __WORK_INITIALIZER(n, f) { \
+ .data = 0, \
+ .entry = { &(n).entry, &(n).entry }, \
+ .func = f \
+ }
+
+/* Don't bother initializing timer. */
+#define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \
+ .work = __WORK_INITIALIZER((n).work, (f)), \
+ }
+
+#define DECLARE_WORK(n, f) \
+ struct workqueue_struct n = __WORK_INITIALIZER
+
+#define DECLARE_DELAYED_WORK(n, f) \
+ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0)
+
+#define system_power_efficient_wq ((struct workqueue_struct *) NULL)
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
new file mode 100644
index 000000000..f47cb2045
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
@@ -0,0 +1 @@
+*.out
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile
new file mode 100644
index 000000000..ad21b925f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+CBMC_FLAGS = -I../.. -I../../src -I../../include -I../../empty_includes -32 -pointer-check -mm pso
+
+all:
+ for i in ./*.pass; do \
+ echo $$i ; \
+ CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-pass $$i > $$i.out 2>&1 ; \
+ done
+ for i in ./*.fail; do \
+ echo $$i ; \
+ CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-fail $$i > $$i.out 2>&1 ; \
+ done
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail
new file mode 100644
index 000000000..40c807591
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail
@@ -0,0 +1 @@
+test_cbmc_options="-DASSERT_END"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail
new file mode 100644
index 000000000..ada5baf0b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail
@@ -0,0 +1 @@
+test_cbmc_options="-DFORCE_FAILURE"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail
new file mode 100644
index 000000000..8fe00c8db
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail
@@ -0,0 +1 @@
+test_cbmc_options="-DFORCE_FAILURE_2"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail
new file mode 100644
index 000000000..612ed6772
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail
@@ -0,0 +1 @@
+test_cbmc_options="-DFORCE_FAILURE_3"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c
new file mode 100644
index 000000000..2ce2016f7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <src/combined_source.c>
+
+int x;
+int y;
+
+int __unbuffered_tpr_x;
+int __unbuffered_tpr_y;
+
+DEFINE_SRCU(ss);
+
+void rcu_reader(void)
+{
+ int idx;
+
+#ifndef FORCE_FAILURE_3
+ idx = srcu_read_lock(&ss);
+#endif
+ might_sleep();
+
+ __unbuffered_tpr_y = READ_ONCE(y);
+#ifdef FORCE_FAILURE
+ srcu_read_unlock(&ss, idx);
+ idx = srcu_read_lock(&ss);
+#endif
+ WRITE_ONCE(x, 1);
+
+#ifndef FORCE_FAILURE_3
+ srcu_read_unlock(&ss, idx);
+#endif
+ might_sleep();
+}
+
+void *thread_update(void *arg)
+{
+ WRITE_ONCE(y, 1);
+#ifndef FORCE_FAILURE_2
+ synchronize_srcu(&ss);
+#endif
+ might_sleep();
+ __unbuffered_tpr_x = READ_ONCE(x);
+
+ return NULL;
+}
+
+void *thread_process_reader(void *arg)
+{
+ rcu_reader();
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ pthread_t tu;
+ pthread_t tpr;
+
+ if (pthread_create(&tu, NULL, thread_update, NULL))
+ abort();
+ if (pthread_create(&tpr, NULL, thread_process_reader, NULL))
+ abort();
+ if (pthread_join(tu, NULL))
+ abort();
+ if (pthread_join(tpr, NULL))
+ abort();
+ assert(__unbuffered_tpr_y != 0 || __unbuffered_tpr_x != 0);
+
+#ifdef ASSERT_END
+ assert(0);
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh
new file mode 100755
index 000000000..2fe1f0339
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh
@@ -0,0 +1,103 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# This script expects a mode (either --should-pass or --should-fail) followed by
+# an input file. The script uses the following environment variables. The test C
+# source file is expected to be named test.c in the directory containing the
+# input file.
+#
+# CBMC: The command to run CBMC. Default: cbmc
+# CBMC_FLAGS: Additional flags to pass to CBMC
+# NR_CPUS: Number of cpus to run tests with. Default specified by the test
+# SYNC_SRCU_MODE: Choose implementation of synchronize_srcu. Defaults to simple.
+# kernel: Version included in the linux kernel source.
+# simple: Use try_check_zero directly.
+#
+# The input file is a script that is sourced by this file. It can define any of
+# the following variables to configure the test.
+#
+# test_cbmc_options: Extra options to pass to CBMC.
+# min_cpus_fail: Minimum number of CPUs (NR_CPUS) for verification to fail.
+# The test is expected to pass if it is run with fewer. (Only
+# useful for .fail files)
+# default_cpus: Quantity of CPUs to use for the test, if not specified on the
+# command line. Default: Larger of 2 and MIN_CPUS_FAIL.
+
+set -e
+
+if test "$#" -ne 2; then
+ echo "Expected one option followed by an input file" 1>&2
+ exit 99
+fi
+
+if test "x$1" = "x--should-pass"; then
+ should_pass="yes"
+elif test "x$1" = "x--should-fail"; then
+ should_pass="no"
+else
+ echo "Unrecognized argument '$1'" 1>&2
+
+ # Exit code 99 indicates a hard error.
+ exit 99
+fi
+
+CBMC=${CBMC:-cbmc}
+
+SYNC_SRCU_MODE=${SYNC_SRCU_MODE:-simple}
+
+case ${SYNC_SRCU_MODE} in
+kernel) sync_srcu_mode_flags="" ;;
+simple) sync_srcu_mode_flags="-DUSE_SIMPLE_SYNC_SRCU" ;;
+
+*)
+ echo "Unrecognized argument '${SYNC_SRCU_MODE}'" 1>&2
+ exit 99
+ ;;
+esac
+
+min_cpus_fail=1
+
+c_file=`dirname "$2"`/test.c
+
+# Source the input file.
+. $2
+
+if test ${min_cpus_fail} -gt 2; then
+ default_default_cpus=${min_cpus_fail}
+else
+ default_default_cpus=2
+fi
+default_cpus=${default_cpus:-${default_default_cpus}}
+cpus=${NR_CPUS:-${default_cpus}}
+
+# Check if there are two few cpus to make the test fail.
+if test $cpus -lt ${min_cpus_fail:-0}; then
+ should_pass="yes"
+fi
+
+cbmc_opts="-DNR_CPUS=${cpus} ${sync_srcu_mode_flags} ${test_cbmc_options} ${CBMC_FLAGS}"
+
+echo "Running CBMC: ${CBMC} ${cbmc_opts} ${c_file}"
+if ${CBMC} ${cbmc_opts} "${c_file}"; then
+ # Verification successful. Make sure that it was supposed to verify.
+ test "x${should_pass}" = xyes
+else
+ cbmc_exit_status=$?
+
+ # An exit status of 10 indicates a failed verification.
+ # (see cbmc_parse_optionst::do_bmc in the CBMC source code)
+ if test ${cbmc_exit_status} -eq 10 && test "x${should_pass}" = xno; then
+ :
+ else
+ echo "CBMC returned ${cbmc_exit_status} exit status" 1>&2
+
+ # Parse errors have exit status 6. Any other type of error
+ # should be considered a hard error.
+ if test ${cbmc_exit_status} -ne 6 && \
+ test ${cbmc_exit_status} -ne 10; then
+ exit 99
+ else
+ exit 1
+ fi
+ fi
+fi
diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore
new file mode 100644
index 000000000..cc610da7e
--- /dev/null
+++ b/tools/testing/selftests/rseq/.gitignore
@@ -0,0 +1,6 @@
+basic_percpu_ops_test
+basic_test
+basic_rseq_op_test
+param_test
+param_test_benchmark
+param_test_compare_twice
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
new file mode 100644
index 000000000..c30c52e1d
--- /dev/null
+++ b/tools/testing/selftests/rseq/Makefile
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./
+LDLIBS += -lpthread
+
+# Own dependencies because we only want to build against 1st prerequisite, but
+# still track changes to header files and depend on shared object.
+OVERRIDE_TARGETS = 1
+
+TEST_GEN_PROGS = basic_test basic_percpu_ops_test param_test \
+ param_test_benchmark param_test_compare_twice
+
+TEST_GEN_PROGS_EXTENDED = librseq.so
+
+TEST_PROGS = run_param_test.sh
+
+include ../lib.mk
+
+$(OUTPUT)/librseq.so: rseq.c rseq.h rseq-*.h
+ $(CC) $(CFLAGS) -shared -fPIC $< $(LDLIBS) -o $@
+
+$(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h
+ $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@
+
+$(OUTPUT)/param_test_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
+ rseq.h rseq-*.h
+ $(CC) $(CFLAGS) -DBENCHMARK $< $(LDLIBS) -lrseq -o $@
+
+$(OUTPUT)/param_test_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
+ rseq.h rseq-*.h
+ $(CC) $(CFLAGS) -DRSEQ_COMPARE_TWICE $< $(LDLIBS) -lrseq -o $@
diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
new file mode 100644
index 000000000..eb3f6db36
--- /dev/null
+++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: LGPL-2.1
+#define _GNU_SOURCE
+#include <assert.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+
+#include "rseq.h"
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+struct percpu_lock_entry {
+ intptr_t v;
+} __attribute__((aligned(128)));
+
+struct percpu_lock {
+ struct percpu_lock_entry c[CPU_SETSIZE];
+};
+
+struct test_data_entry {
+ intptr_t count;
+} __attribute__((aligned(128)));
+
+struct spinlock_test_data {
+ struct percpu_lock lock;
+ struct test_data_entry c[CPU_SETSIZE];
+ int reps;
+};
+
+struct percpu_list_node {
+ intptr_t data;
+ struct percpu_list_node *next;
+};
+
+struct percpu_list_entry {
+ struct percpu_list_node *head;
+} __attribute__((aligned(128)));
+
+struct percpu_list {
+ struct percpu_list_entry c[CPU_SETSIZE];
+};
+
+/* A simple percpu spinlock. Returns the cpu lock was acquired on. */
+int rseq_this_cpu_lock(struct percpu_lock *lock)
+{
+ int cpu;
+
+ for (;;) {
+ int ret;
+
+ cpu = rseq_cpu_start();
+ ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
+ 0, 1, cpu);
+ if (rseq_likely(!ret))
+ break;
+ /* Retry if comparison fails or rseq aborts. */
+ }
+ /*
+ * Acquire semantic when taking lock after control dependency.
+ * Matches rseq_smp_store_release().
+ */
+ rseq_smp_acquire__after_ctrl_dep();
+ return cpu;
+}
+
+void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
+{
+ assert(lock->c[cpu].v == 1);
+ /*
+ * Release lock, with release semantic. Matches
+ * rseq_smp_acquire__after_ctrl_dep().
+ */
+ rseq_smp_store_release(&lock->c[cpu].v, 0);
+}
+
+void *test_percpu_spinlock_thread(void *arg)
+{
+ struct spinlock_test_data *data = arg;
+ int i, cpu;
+
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+ for (i = 0; i < data->reps; i++) {
+ cpu = rseq_this_cpu_lock(&data->lock);
+ data->c[cpu].count++;
+ rseq_percpu_unlock(&data->lock, cpu);
+ }
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ return NULL;
+}
+
+/*
+ * A simple test which implements a sharded counter using a per-cpu
+ * lock. Obviously real applications might prefer to simply use a
+ * per-cpu increment; however, this is reasonable for a test and the
+ * lock can be extended to synchronize more complicated operations.
+ */
+void test_percpu_spinlock(void)
+{
+ const int num_threads = 200;
+ int i;
+ uint64_t sum;
+ pthread_t test_threads[num_threads];
+ struct spinlock_test_data data;
+
+ memset(&data, 0, sizeof(data));
+ data.reps = 5000;
+
+ for (i = 0; i < num_threads; i++)
+ pthread_create(&test_threads[i], NULL,
+ test_percpu_spinlock_thread, &data);
+
+ for (i = 0; i < num_threads; i++)
+ pthread_join(test_threads[i], NULL);
+
+ sum = 0;
+ for (i = 0; i < CPU_SETSIZE; i++)
+ sum += data.c[i].count;
+
+ assert(sum == (uint64_t)data.reps * num_threads);
+}
+
+void this_cpu_list_push(struct percpu_list *list,
+ struct percpu_list_node *node,
+ int *_cpu)
+{
+ int cpu;
+
+ for (;;) {
+ intptr_t *targetptr, newval, expect;
+ int ret;
+
+ cpu = rseq_cpu_start();
+ /* Load list->c[cpu].head with single-copy atomicity. */
+ expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
+ newval = (intptr_t)node;
+ targetptr = (intptr_t *)&list->c[cpu].head;
+ node->next = (struct percpu_list_node *)expect;
+ ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
+ if (rseq_likely(!ret))
+ break;
+ /* Retry if comparison fails or rseq aborts. */
+ }
+ if (_cpu)
+ *_cpu = cpu;
+}
+
+/*
+ * Unlike a traditional lock-less linked list; the availability of a
+ * rseq primitive allows us to implement pop without concerns over
+ * ABA-type races.
+ */
+struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
+ int *_cpu)
+{
+ for (;;) {
+ struct percpu_list_node *head;
+ intptr_t *targetptr, expectnot, *load;
+ off_t offset;
+ int ret, cpu;
+
+ cpu = rseq_cpu_start();
+ targetptr = (intptr_t *)&list->c[cpu].head;
+ expectnot = (intptr_t)NULL;
+ offset = offsetof(struct percpu_list_node, next);
+ load = (intptr_t *)&head;
+ ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
+ offset, load, cpu);
+ if (rseq_likely(!ret)) {
+ if (_cpu)
+ *_cpu = cpu;
+ return head;
+ }
+ if (ret > 0)
+ return NULL;
+ /* Retry if rseq aborts. */
+ }
+}
+
+/*
+ * __percpu_list_pop is not safe against concurrent accesses. Should
+ * only be used on lists that are not concurrently modified.
+ */
+struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
+{
+ struct percpu_list_node *node;
+
+ node = list->c[cpu].head;
+ if (!node)
+ return NULL;
+ list->c[cpu].head = node->next;
+ return node;
+}
+
+void *test_percpu_list_thread(void *arg)
+{
+ int i;
+ struct percpu_list *list = (struct percpu_list *)arg;
+
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ for (i = 0; i < 100000; i++) {
+ struct percpu_list_node *node;
+
+ node = this_cpu_list_pop(list, NULL);
+ sched_yield(); /* encourage shuffling */
+ if (node)
+ this_cpu_list_push(list, node, NULL);
+ }
+
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ return NULL;
+}
+
+/* Simultaneous modification to a per-cpu linked list from many threads. */
+void test_percpu_list(void)
+{
+ int i, j;
+ uint64_t sum = 0, expected_sum = 0;
+ struct percpu_list list;
+ pthread_t test_threads[200];
+ cpu_set_t allowed_cpus;
+
+ memset(&list, 0, sizeof(list));
+
+ /* Generate list entries for every usable cpu. */
+ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ if (!CPU_ISSET(i, &allowed_cpus))
+ continue;
+ for (j = 1; j <= 100; j++) {
+ struct percpu_list_node *node;
+
+ expected_sum += j;
+
+ node = malloc(sizeof(*node));
+ assert(node);
+ node->data = j;
+ node->next = list.c[i].head;
+ list.c[i].head = node;
+ }
+ }
+
+ for (i = 0; i < 200; i++)
+ pthread_create(&test_threads[i], NULL,
+ test_percpu_list_thread, &list);
+
+ for (i = 0; i < 200; i++)
+ pthread_join(test_threads[i], NULL);
+
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ struct percpu_list_node *node;
+
+ if (!CPU_ISSET(i, &allowed_cpus))
+ continue;
+
+ while ((node = __percpu_list_pop(&list, i))) {
+ sum += node->data;
+ free(node);
+ }
+ }
+
+ /*
+ * All entries should now be accounted for (unless some external
+ * actor is interfering with our allowed affinity while this
+ * test is running).
+ */
+ assert(sum == expected_sum);
+}
+
+int main(int argc, char **argv)
+{
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ goto error;
+ }
+ printf("spinlock\n");
+ test_percpu_spinlock();
+ printf("percpu_list\n");
+ test_percpu_list();
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ goto error;
+ }
+ return 0;
+
+error:
+ return -1;
+}
diff --git a/tools/testing/selftests/rseq/basic_test.c b/tools/testing/selftests/rseq/basic_test.c
new file mode 100644
index 000000000..d8efbfb89
--- /dev/null
+++ b/tools/testing/selftests/rseq/basic_test.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Basic test coverage for critical regions and rseq_current_cpu().
+ */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/time.h>
+
+#include "rseq.h"
+
+void test_cpu_pointer(void)
+{
+ cpu_set_t affinity, test_affinity;
+ int i;
+
+ sched_getaffinity(0, sizeof(affinity), &affinity);
+ CPU_ZERO(&test_affinity);
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ if (CPU_ISSET(i, &affinity)) {
+ CPU_SET(i, &test_affinity);
+ sched_setaffinity(0, sizeof(test_affinity),
+ &test_affinity);
+ assert(sched_getcpu() == i);
+ assert(rseq_current_cpu() == i);
+ assert(rseq_current_cpu_raw() == i);
+ assert(rseq_cpu_start() == i);
+ CPU_CLR(i, &test_affinity);
+ }
+ }
+ sched_setaffinity(0, sizeof(affinity), &affinity);
+}
+
+int main(int argc, char **argv)
+{
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ goto init_thread_error;
+ }
+ printf("testing current cpu\n");
+ test_cpu_pointer();
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ goto init_thread_error;
+ }
+ return 0;
+
+init_thread_error:
+ return -1;
+}
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
new file mode 100644
index 000000000..e8a657a5f
--- /dev/null
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -0,0 +1,1331 @@
+// SPDX-License-Identifier: LGPL-2.1
+#define _GNU_SOURCE
+#include <assert.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <poll.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
+#include <stddef.h>
+
+static inline pid_t rseq_gettid(void)
+{
+ return syscall(__NR_gettid);
+}
+
+#define NR_INJECT 9
+static int loop_cnt[NR_INJECT + 1];
+
+static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
+static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
+static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
+static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
+static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
+static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
+
+static int opt_modulo, verbose;
+
+static int opt_yield, opt_signal, opt_sleep,
+ opt_disable_rseq, opt_threads = 200,
+ opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
+
+#ifndef RSEQ_SKIP_FASTPATH
+static long long opt_reps = 5000;
+#else
+static long long opt_reps = 100;
+#endif
+
+static __thread __attribute__((tls_model("initial-exec")))
+unsigned int signals_delivered;
+
+#ifndef BENCHMARK
+
+static __thread __attribute__((tls_model("initial-exec"), unused))
+unsigned int yield_mod_cnt, nr_abort;
+
+#define printf_verbose(fmt, ...) \
+ do { \
+ if (verbose) \
+ printf(fmt, ## __VA_ARGS__); \
+ } while (0)
+
+#ifdef __i386__
+
+#define INJECT_ASM_REG "eax"
+
+#define RSEQ_INJECT_CLOBBER \
+ , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+ "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
+ "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
+ "jz 333f\n\t" \
+ "222:\n\t" \
+ "dec %%" INJECT_ASM_REG "\n\t" \
+ "jnz 222b\n\t" \
+ "333:\n\t"
+
+#elif defined(__x86_64__)
+
+#define INJECT_ASM_REG_P "rax"
+#define INJECT_ASM_REG "eax"
+
+#define RSEQ_INJECT_CLOBBER \
+ , INJECT_ASM_REG_P \
+ , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+ "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
+ "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
+ "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
+ "jz 333f\n\t" \
+ "222:\n\t" \
+ "dec %%" INJECT_ASM_REG "\n\t" \
+ "jnz 222b\n\t" \
+ "333:\n\t"
+
+#elif defined(__s390__)
+
+#define RSEQ_INJECT_INPUT \
+ , [loop_cnt_1]"m"(loop_cnt[1]) \
+ , [loop_cnt_2]"m"(loop_cnt[2]) \
+ , [loop_cnt_3]"m"(loop_cnt[3]) \
+ , [loop_cnt_4]"m"(loop_cnt[4]) \
+ , [loop_cnt_5]"m"(loop_cnt[5]) \
+ , [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG "r12"
+
+#define RSEQ_INJECT_CLOBBER \
+ , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+ "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+ "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
+ "je 333f\n\t" \
+ "222:\n\t" \
+ "ahi %%" INJECT_ASM_REG ", -1\n\t" \
+ "jnz 222b\n\t" \
+ "333:\n\t"
+
+#elif defined(__ARMEL__)
+
+#define RSEQ_INJECT_INPUT \
+ , [loop_cnt_1]"m"(loop_cnt[1]) \
+ , [loop_cnt_2]"m"(loop_cnt[2]) \
+ , [loop_cnt_3]"m"(loop_cnt[3]) \
+ , [loop_cnt_4]"m"(loop_cnt[4]) \
+ , [loop_cnt_5]"m"(loop_cnt[5]) \
+ , [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG "r4"
+
+#define RSEQ_INJECT_CLOBBER \
+ , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+ "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+ "cmp " INJECT_ASM_REG ", #0\n\t" \
+ "beq 333f\n\t" \
+ "222:\n\t" \
+ "subs " INJECT_ASM_REG ", #1\n\t" \
+ "bne 222b\n\t" \
+ "333:\n\t"
+
+#elif defined(__AARCH64EL__)
+
+#define RSEQ_INJECT_INPUT \
+ , [loop_cnt_1] "Qo" (loop_cnt[1]) \
+ , [loop_cnt_2] "Qo" (loop_cnt[2]) \
+ , [loop_cnt_3] "Qo" (loop_cnt[3]) \
+ , [loop_cnt_4] "Qo" (loop_cnt[4]) \
+ , [loop_cnt_5] "Qo" (loop_cnt[5]) \
+ , [loop_cnt_6] "Qo" (loop_cnt[6])
+
+#define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
+
+#define RSEQ_INJECT_ASM(n) \
+ " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
+ " cbz " INJECT_ASM_REG ", 333f\n" \
+ "222:\n" \
+ " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
+ " cbnz " INJECT_ASM_REG ", 222b\n" \
+ "333:\n"
+
+#elif __PPC__
+
+#define RSEQ_INJECT_INPUT \
+ , [loop_cnt_1]"m"(loop_cnt[1]) \
+ , [loop_cnt_2]"m"(loop_cnt[2]) \
+ , [loop_cnt_3]"m"(loop_cnt[3]) \
+ , [loop_cnt_4]"m"(loop_cnt[4]) \
+ , [loop_cnt_5]"m"(loop_cnt[5]) \
+ , [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG "r18"
+
+#define RSEQ_INJECT_CLOBBER \
+ , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+ "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+ "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
+ "beq 333f\n\t" \
+ "222:\n\t" \
+ "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
+ "bne 222b\n\t" \
+ "333:\n\t"
+
+#elif defined(__mips__)
+
+#define RSEQ_INJECT_INPUT \
+ , [loop_cnt_1]"m"(loop_cnt[1]) \
+ , [loop_cnt_2]"m"(loop_cnt[2]) \
+ , [loop_cnt_3]"m"(loop_cnt[3]) \
+ , [loop_cnt_4]"m"(loop_cnt[4]) \
+ , [loop_cnt_5]"m"(loop_cnt[5]) \
+ , [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG "$5"
+
+#define RSEQ_INJECT_CLOBBER \
+ , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+ "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+ "beqz " INJECT_ASM_REG ", 333f\n\t" \
+ "222:\n\t" \
+ "addiu " INJECT_ASM_REG ", -1\n\t" \
+ "bnez " INJECT_ASM_REG ", 222b\n\t" \
+ "333:\n\t"
+
+#else
+#error unsupported target
+#endif
+
+#define RSEQ_INJECT_FAILED \
+ nr_abort++;
+
+#define RSEQ_INJECT_C(n) \
+{ \
+ int loc_i, loc_nr_loops = loop_cnt[n]; \
+ \
+ for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
+ rseq_barrier(); \
+ } \
+ if (loc_nr_loops == -1 && opt_modulo) { \
+ if (yield_mod_cnt == opt_modulo - 1) { \
+ if (opt_sleep > 0) \
+ poll(NULL, 0, opt_sleep); \
+ if (opt_yield) \
+ sched_yield(); \
+ if (opt_signal) \
+ raise(SIGUSR1); \
+ yield_mod_cnt = 0; \
+ } else { \
+ yield_mod_cnt++; \
+ } \
+ } \
+}
+
+#else
+
+#define printf_verbose(fmt, ...)
+
+#endif /* BENCHMARK */
+
+#include "rseq.h"
+
+struct percpu_lock_entry {
+ intptr_t v;
+} __attribute__((aligned(128)));
+
+struct percpu_lock {
+ struct percpu_lock_entry c[CPU_SETSIZE];
+};
+
+struct test_data_entry {
+ intptr_t count;
+} __attribute__((aligned(128)));
+
+struct spinlock_test_data {
+ struct percpu_lock lock;
+ struct test_data_entry c[CPU_SETSIZE];
+};
+
+struct spinlock_thread_test_data {
+ struct spinlock_test_data *data;
+ long long reps;
+ int reg;
+};
+
+struct inc_test_data {
+ struct test_data_entry c[CPU_SETSIZE];
+};
+
+struct inc_thread_test_data {
+ struct inc_test_data *data;
+ long long reps;
+ int reg;
+};
+
+struct percpu_list_node {
+ intptr_t data;
+ struct percpu_list_node *next;
+};
+
+struct percpu_list_entry {
+ struct percpu_list_node *head;
+} __attribute__((aligned(128)));
+
+struct percpu_list {
+ struct percpu_list_entry c[CPU_SETSIZE];
+};
+
+#define BUFFER_ITEM_PER_CPU 100
+
+struct percpu_buffer_node {
+ intptr_t data;
+};
+
+struct percpu_buffer_entry {
+ intptr_t offset;
+ intptr_t buflen;
+ struct percpu_buffer_node **array;
+} __attribute__((aligned(128)));
+
+struct percpu_buffer {
+ struct percpu_buffer_entry c[CPU_SETSIZE];
+};
+
+#define MEMCPY_BUFFER_ITEM_PER_CPU 100
+
+struct percpu_memcpy_buffer_node {
+ intptr_t data1;
+ uint64_t data2;
+};
+
+struct percpu_memcpy_buffer_entry {
+ intptr_t offset;
+ intptr_t buflen;
+ struct percpu_memcpy_buffer_node *array;
+} __attribute__((aligned(128)));
+
+struct percpu_memcpy_buffer {
+ struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
+};
+
+/* A simple percpu spinlock. Grabs lock on current cpu. */
+static int rseq_this_cpu_lock(struct percpu_lock *lock)
+{
+ int cpu;
+
+ for (;;) {
+ int ret;
+
+ cpu = rseq_cpu_start();
+ ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
+ 0, 1, cpu);
+ if (rseq_likely(!ret))
+ break;
+ /* Retry if comparison fails or rseq aborts. */
+ }
+ /*
+ * Acquire semantic when taking lock after control dependency.
+ * Matches rseq_smp_store_release().
+ */
+ rseq_smp_acquire__after_ctrl_dep();
+ return cpu;
+}
+
+static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
+{
+ assert(lock->c[cpu].v == 1);
+ /*
+ * Release lock, with release semantic. Matches
+ * rseq_smp_acquire__after_ctrl_dep().
+ */
+ rseq_smp_store_release(&lock->c[cpu].v, 0);
+}
+
+void *test_percpu_spinlock_thread(void *arg)
+{
+ struct spinlock_thread_test_data *thread_data = arg;
+ struct spinlock_test_data *data = thread_data->data;
+ long long i, reps;
+
+ if (!opt_disable_rseq && thread_data->reg &&
+ rseq_register_current_thread())
+ abort();
+ reps = thread_data->reps;
+ for (i = 0; i < reps; i++) {
+ int cpu = rseq_cpu_start();
+
+ cpu = rseq_this_cpu_lock(&data->lock);
+ data->c[cpu].count++;
+ rseq_percpu_unlock(&data->lock, cpu);
+#ifndef BENCHMARK
+ if (i != 0 && !(i % (reps / 10)))
+ printf_verbose("tid %d: count %lld\n",
+ (int) rseq_gettid(), i);
+#endif
+ }
+ printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+ (int) rseq_gettid(), nr_abort, signals_delivered);
+ if (!opt_disable_rseq && thread_data->reg &&
+ rseq_unregister_current_thread())
+ abort();
+ return NULL;
+}
+
+/*
+ * A simple test which implements a sharded counter using a per-cpu
+ * lock. Obviously real applications might prefer to simply use a
+ * per-cpu increment; however, this is reasonable for a test and the
+ * lock can be extended to synchronize more complicated operations.
+ */
+void test_percpu_spinlock(void)
+{
+ const int num_threads = opt_threads;
+ int i, ret;
+ uint64_t sum;
+ pthread_t test_threads[num_threads];
+ struct spinlock_test_data data;
+ struct spinlock_thread_test_data thread_data[num_threads];
+
+ memset(&data, 0, sizeof(data));
+ for (i = 0; i < num_threads; i++) {
+ thread_data[i].reps = opt_reps;
+ if (opt_disable_mod <= 0 || (i % opt_disable_mod))
+ thread_data[i].reg = 1;
+ else
+ thread_data[i].reg = 0;
+ thread_data[i].data = &data;
+ ret = pthread_create(&test_threads[i], NULL,
+ test_percpu_spinlock_thread,
+ &thread_data[i]);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_join(test_threads[i], NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+ }
+
+ sum = 0;
+ for (i = 0; i < CPU_SETSIZE; i++)
+ sum += data.c[i].count;
+
+ assert(sum == (uint64_t)opt_reps * num_threads);
+}
+
+void *test_percpu_inc_thread(void *arg)
+{
+ struct inc_thread_test_data *thread_data = arg;
+ struct inc_test_data *data = thread_data->data;
+ long long i, reps;
+
+ if (!opt_disable_rseq && thread_data->reg &&
+ rseq_register_current_thread())
+ abort();
+ reps = thread_data->reps;
+ for (i = 0; i < reps; i++) {
+ int ret;
+
+ do {
+ int cpu;
+
+ cpu = rseq_cpu_start();
+ ret = rseq_addv(&data->c[cpu].count, 1, cpu);
+ } while (rseq_unlikely(ret));
+#ifndef BENCHMARK
+ if (i != 0 && !(i % (reps / 10)))
+ printf_verbose("tid %d: count %lld\n",
+ (int) rseq_gettid(), i);
+#endif
+ }
+ printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+ (int) rseq_gettid(), nr_abort, signals_delivered);
+ if (!opt_disable_rseq && thread_data->reg &&
+ rseq_unregister_current_thread())
+ abort();
+ return NULL;
+}
+
+void test_percpu_inc(void)
+{
+ const int num_threads = opt_threads;
+ int i, ret;
+ uint64_t sum;
+ pthread_t test_threads[num_threads];
+ struct inc_test_data data;
+ struct inc_thread_test_data thread_data[num_threads];
+
+ memset(&data, 0, sizeof(data));
+ for (i = 0; i < num_threads; i++) {
+ thread_data[i].reps = opt_reps;
+ if (opt_disable_mod <= 0 || (i % opt_disable_mod))
+ thread_data[i].reg = 1;
+ else
+ thread_data[i].reg = 0;
+ thread_data[i].data = &data;
+ ret = pthread_create(&test_threads[i], NULL,
+ test_percpu_inc_thread,
+ &thread_data[i]);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_join(test_threads[i], NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+ }
+
+ sum = 0;
+ for (i = 0; i < CPU_SETSIZE; i++)
+ sum += data.c[i].count;
+
+ assert(sum == (uint64_t)opt_reps * num_threads);
+}
+
+void this_cpu_list_push(struct percpu_list *list,
+ struct percpu_list_node *node,
+ int *_cpu)
+{
+ int cpu;
+
+ for (;;) {
+ intptr_t *targetptr, newval, expect;
+ int ret;
+
+ cpu = rseq_cpu_start();
+ /* Load list->c[cpu].head with single-copy atomicity. */
+ expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
+ newval = (intptr_t)node;
+ targetptr = (intptr_t *)&list->c[cpu].head;
+ node->next = (struct percpu_list_node *)expect;
+ ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
+ if (rseq_likely(!ret))
+ break;
+ /* Retry if comparison fails or rseq aborts. */
+ }
+ if (_cpu)
+ *_cpu = cpu;
+}
+
+/*
+ * Unlike a traditional lock-less linked list; the availability of a
+ * rseq primitive allows us to implement pop without concerns over
+ * ABA-type races.
+ */
+struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
+ int *_cpu)
+{
+ struct percpu_list_node *node = NULL;
+ int cpu;
+
+ for (;;) {
+ struct percpu_list_node *head;
+ intptr_t *targetptr, expectnot, *load;
+ off_t offset;
+ int ret;
+
+ cpu = rseq_cpu_start();
+ targetptr = (intptr_t *)&list->c[cpu].head;
+ expectnot = (intptr_t)NULL;
+ offset = offsetof(struct percpu_list_node, next);
+ load = (intptr_t *)&head;
+ ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
+ offset, load, cpu);
+ if (rseq_likely(!ret)) {
+ node = head;
+ break;
+ }
+ if (ret > 0)
+ break;
+ /* Retry if rseq aborts. */
+ }
+ if (_cpu)
+ *_cpu = cpu;
+ return node;
+}
+
+/*
+ * __percpu_list_pop is not safe against concurrent accesses. Should
+ * only be used on lists that are not concurrently modified.
+ */
+struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
+{
+ struct percpu_list_node *node;
+
+ node = list->c[cpu].head;
+ if (!node)
+ return NULL;
+ list->c[cpu].head = node->next;
+ return node;
+}
+
+void *test_percpu_list_thread(void *arg)
+{
+ long long i, reps;
+ struct percpu_list *list = (struct percpu_list *)arg;
+
+ if (!opt_disable_rseq && rseq_register_current_thread())
+ abort();
+
+ reps = opt_reps;
+ for (i = 0; i < reps; i++) {
+ struct percpu_list_node *node;
+
+ node = this_cpu_list_pop(list, NULL);
+ if (opt_yield)
+ sched_yield(); /* encourage shuffling */
+ if (node)
+ this_cpu_list_push(list, node, NULL);
+ }
+
+ printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+ (int) rseq_gettid(), nr_abort, signals_delivered);
+ if (!opt_disable_rseq && rseq_unregister_current_thread())
+ abort();
+
+ return NULL;
+}
+
+/* Simultaneous modification to a per-cpu linked list from many threads. */
+void test_percpu_list(void)
+{
+ const int num_threads = opt_threads;
+ int i, j, ret;
+ uint64_t sum = 0, expected_sum = 0;
+ struct percpu_list list;
+ pthread_t test_threads[num_threads];
+ cpu_set_t allowed_cpus;
+
+ memset(&list, 0, sizeof(list));
+
+ /* Generate list entries for every usable cpu. */
+ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ if (!CPU_ISSET(i, &allowed_cpus))
+ continue;
+ for (j = 1; j <= 100; j++) {
+ struct percpu_list_node *node;
+
+ expected_sum += j;
+
+ node = malloc(sizeof(*node));
+ assert(node);
+ node->data = j;
+ node->next = list.c[i].head;
+ list.c[i].head = node;
+ }
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_create(&test_threads[i], NULL,
+ test_percpu_list_thread, &list);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_join(test_threads[i], NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+ }
+
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ struct percpu_list_node *node;
+
+ if (!CPU_ISSET(i, &allowed_cpus))
+ continue;
+
+ while ((node = __percpu_list_pop(&list, i))) {
+ sum += node->data;
+ free(node);
+ }
+ }
+
+ /*
+ * All entries should now be accounted for (unless some external
+ * actor is interfering with our allowed affinity while this
+ * test is running).
+ */
+ assert(sum == expected_sum);
+}
+
+bool this_cpu_buffer_push(struct percpu_buffer *buffer,
+ struct percpu_buffer_node *node,
+ int *_cpu)
+{
+ bool result = false;
+ int cpu;
+
+ for (;;) {
+ intptr_t *targetptr_spec, newval_spec;
+ intptr_t *targetptr_final, newval_final;
+ intptr_t offset;
+ int ret;
+
+ cpu = rseq_cpu_start();
+ offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+ if (offset == buffer->c[cpu].buflen)
+ break;
+ newval_spec = (intptr_t)node;
+ targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
+ newval_final = offset + 1;
+ targetptr_final = &buffer->c[cpu].offset;
+ if (opt_mb)
+ ret = rseq_cmpeqv_trystorev_storev_release(
+ targetptr_final, offset, targetptr_spec,
+ newval_spec, newval_final, cpu);
+ else
+ ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
+ offset, targetptr_spec, newval_spec,
+ newval_final, cpu);
+ if (rseq_likely(!ret)) {
+ result = true;
+ break;
+ }
+ /* Retry if comparison fails or rseq aborts. */
+ }
+ if (_cpu)
+ *_cpu = cpu;
+ return result;
+}
+
+struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
+ int *_cpu)
+{
+ struct percpu_buffer_node *head;
+ int cpu;
+
+ for (;;) {
+ intptr_t *targetptr, newval;
+ intptr_t offset;
+ int ret;
+
+ cpu = rseq_cpu_start();
+ /* Load offset with single-copy atomicity. */
+ offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+ if (offset == 0) {
+ head = NULL;
+ break;
+ }
+ head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
+ newval = offset - 1;
+ targetptr = (intptr_t *)&buffer->c[cpu].offset;
+ ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
+ (intptr_t *)&buffer->c[cpu].array[offset - 1],
+ (intptr_t)head, newval, cpu);
+ if (rseq_likely(!ret))
+ break;
+ /* Retry if comparison fails or rseq aborts. */
+ }
+ if (_cpu)
+ *_cpu = cpu;
+ return head;
+}
+
+/*
+ * __percpu_buffer_pop is not safe against concurrent accesses. Should
+ * only be used on buffers that are not concurrently modified.
+ */
+struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
+ int cpu)
+{
+ struct percpu_buffer_node *head;
+ intptr_t offset;
+
+ offset = buffer->c[cpu].offset;
+ if (offset == 0)
+ return NULL;
+ head = buffer->c[cpu].array[offset - 1];
+ buffer->c[cpu].offset = offset - 1;
+ return head;
+}
+
+void *test_percpu_buffer_thread(void *arg)
+{
+ long long i, reps;
+ struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
+
+ if (!opt_disable_rseq && rseq_register_current_thread())
+ abort();
+
+ reps = opt_reps;
+ for (i = 0; i < reps; i++) {
+ struct percpu_buffer_node *node;
+
+ node = this_cpu_buffer_pop(buffer, NULL);
+ if (opt_yield)
+ sched_yield(); /* encourage shuffling */
+ if (node) {
+ if (!this_cpu_buffer_push(buffer, node, NULL)) {
+ /* Should increase buffer size. */
+ abort();
+ }
+ }
+ }
+
+ printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+ (int) rseq_gettid(), nr_abort, signals_delivered);
+ if (!opt_disable_rseq && rseq_unregister_current_thread())
+ abort();
+
+ return NULL;
+}
+
+/* Simultaneous modification to a per-cpu buffer from many threads. */
+void test_percpu_buffer(void)
+{
+ const int num_threads = opt_threads;
+ int i, j, ret;
+ uint64_t sum = 0, expected_sum = 0;
+ struct percpu_buffer buffer;
+ pthread_t test_threads[num_threads];
+ cpu_set_t allowed_cpus;
+
+ memset(&buffer, 0, sizeof(buffer));
+
+ /* Generate list entries for every usable cpu. */
+ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ if (!CPU_ISSET(i, &allowed_cpus))
+ continue;
+ /* Worse-case is every item in same CPU. */
+ buffer.c[i].array =
+ malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
+ BUFFER_ITEM_PER_CPU);
+ assert(buffer.c[i].array);
+ buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
+ for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
+ struct percpu_buffer_node *node;
+
+ expected_sum += j;
+
+ /*
+ * We could theoretically put the word-sized
+ * "data" directly in the buffer. However, we
+ * want to model objects that would not fit
+ * within a single word, so allocate an object
+ * for each node.
+ */
+ node = malloc(sizeof(*node));
+ assert(node);
+ node->data = j;
+ buffer.c[i].array[j - 1] = node;
+ buffer.c[i].offset++;
+ }
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_create(&test_threads[i], NULL,
+ test_percpu_buffer_thread, &buffer);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_join(test_threads[i], NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+ }
+
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ struct percpu_buffer_node *node;
+
+ if (!CPU_ISSET(i, &allowed_cpus))
+ continue;
+
+ while ((node = __percpu_buffer_pop(&buffer, i))) {
+ sum += node->data;
+ free(node);
+ }
+ free(buffer.c[i].array);
+ }
+
+ /*
+ * All entries should now be accounted for (unless some external
+ * actor is interfering with our allowed affinity while this
+ * test is running).
+ */
+ assert(sum == expected_sum);
+}
+
+bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
+ struct percpu_memcpy_buffer_node item,
+ int *_cpu)
+{
+ bool result = false;
+ int cpu;
+
+ for (;;) {
+ intptr_t *targetptr_final, newval_final, offset;
+ char *destptr, *srcptr;
+ size_t copylen;
+ int ret;
+
+ cpu = rseq_cpu_start();
+ /* Load offset with single-copy atomicity. */
+ offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+ if (offset == buffer->c[cpu].buflen)
+ break;
+ destptr = (char *)&buffer->c[cpu].array[offset];
+ srcptr = (char *)&item;
+ /* copylen must be <= 4kB. */
+ copylen = sizeof(item);
+ newval_final = offset + 1;
+ targetptr_final = &buffer->c[cpu].offset;
+ if (opt_mb)
+ ret = rseq_cmpeqv_trymemcpy_storev_release(
+ targetptr_final, offset,
+ destptr, srcptr, copylen,
+ newval_final, cpu);
+ else
+ ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
+ offset, destptr, srcptr, copylen,
+ newval_final, cpu);
+ if (rseq_likely(!ret)) {
+ result = true;
+ break;
+ }
+ /* Retry if comparison fails or rseq aborts. */
+ }
+ if (_cpu)
+ *_cpu = cpu;
+ return result;
+}
+
+bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
+ struct percpu_memcpy_buffer_node *item,
+ int *_cpu)
+{
+ bool result = false;
+ int cpu;
+
+ for (;;) {
+ intptr_t *targetptr_final, newval_final, offset;
+ char *destptr, *srcptr;
+ size_t copylen;
+ int ret;
+
+ cpu = rseq_cpu_start();
+ /* Load offset with single-copy atomicity. */
+ offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+ if (offset == 0)
+ break;
+ destptr = (char *)item;
+ srcptr = (char *)&buffer->c[cpu].array[offset - 1];
+ /* copylen must be <= 4kB. */
+ copylen = sizeof(*item);
+ newval_final = offset - 1;
+ targetptr_final = &buffer->c[cpu].offset;
+ ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
+ offset, destptr, srcptr, copylen,
+ newval_final, cpu);
+ if (rseq_likely(!ret)) {
+ result = true;
+ break;
+ }
+ /* Retry if comparison fails or rseq aborts. */
+ }
+ if (_cpu)
+ *_cpu = cpu;
+ return result;
+}
+
+/*
+ * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
+ * only be used on buffers that are not concurrently modified.
+ */
+bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
+ struct percpu_memcpy_buffer_node *item,
+ int cpu)
+{
+ intptr_t offset;
+
+ offset = buffer->c[cpu].offset;
+ if (offset == 0)
+ return false;
+ memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
+ buffer->c[cpu].offset = offset - 1;
+ return true;
+}
+
+void *test_percpu_memcpy_buffer_thread(void *arg)
+{
+ long long i, reps;
+ struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
+
+ if (!opt_disable_rseq && rseq_register_current_thread())
+ abort();
+
+ reps = opt_reps;
+ for (i = 0; i < reps; i++) {
+ struct percpu_memcpy_buffer_node item;
+ bool result;
+
+ result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
+ if (opt_yield)
+ sched_yield(); /* encourage shuffling */
+ if (result) {
+ if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
+ /* Should increase buffer size. */
+ abort();
+ }
+ }
+ }
+
+ printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+ (int) rseq_gettid(), nr_abort, signals_delivered);
+ if (!opt_disable_rseq && rseq_unregister_current_thread())
+ abort();
+
+ return NULL;
+}
+
+/* Simultaneous modification to a per-cpu buffer from many threads. */
+void test_percpu_memcpy_buffer(void)
+{
+ const int num_threads = opt_threads;
+ int i, j, ret;
+ uint64_t sum = 0, expected_sum = 0;
+ struct percpu_memcpy_buffer buffer;
+ pthread_t test_threads[num_threads];
+ cpu_set_t allowed_cpus;
+
+ memset(&buffer, 0, sizeof(buffer));
+
+ /* Generate list entries for every usable cpu. */
+ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ if (!CPU_ISSET(i, &allowed_cpus))
+ continue;
+ /* Worse-case is every item in same CPU. */
+ buffer.c[i].array =
+ malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
+ MEMCPY_BUFFER_ITEM_PER_CPU);
+ assert(buffer.c[i].array);
+ buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
+ for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
+ expected_sum += 2 * j + 1;
+
+ /*
+ * We could theoretically put the word-sized
+ * "data" directly in the buffer. However, we
+ * want to model objects that would not fit
+ * within a single word, so allocate an object
+ * for each node.
+ */
+ buffer.c[i].array[j - 1].data1 = j;
+ buffer.c[i].array[j - 1].data2 = j + 1;
+ buffer.c[i].offset++;
+ }
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_create(&test_threads[i], NULL,
+ test_percpu_memcpy_buffer_thread,
+ &buffer);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_join(test_threads[i], NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+ }
+
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ struct percpu_memcpy_buffer_node item;
+
+ if (!CPU_ISSET(i, &allowed_cpus))
+ continue;
+
+ while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
+ sum += item.data1;
+ sum += item.data2;
+ }
+ free(buffer.c[i].array);
+ }
+
+ /*
+ * All entries should now be accounted for (unless some external
+ * actor is interfering with our allowed affinity while this
+ * test is running).
+ */
+ assert(sum == expected_sum);
+}
+
+static void test_signal_interrupt_handler(int signo)
+{
+ signals_delivered++;
+}
+
+static int set_signal_handler(void)
+{
+ int ret = 0;
+ struct sigaction sa;
+ sigset_t sigset;
+
+ ret = sigemptyset(&sigset);
+ if (ret < 0) {
+ perror("sigemptyset");
+ return ret;
+ }
+
+ sa.sa_handler = test_signal_interrupt_handler;
+ sa.sa_mask = sigset;
+ sa.sa_flags = 0;
+ ret = sigaction(SIGUSR1, &sa, NULL);
+ if (ret < 0) {
+ perror("sigaction");
+ return ret;
+ }
+
+ printf_verbose("Signal handler set for SIGUSR1\n");
+
+ return ret;
+}
+
+static void show_usage(int argc, char **argv)
+{
+ printf("Usage : %s <OPTIONS>\n",
+ argv[0]);
+ printf("OPTIONS:\n");
+ printf(" [-1 loops] Number of loops for delay injection 1\n");
+ printf(" [-2 loops] Number of loops for delay injection 2\n");
+ printf(" [-3 loops] Number of loops for delay injection 3\n");
+ printf(" [-4 loops] Number of loops for delay injection 4\n");
+ printf(" [-5 loops] Number of loops for delay injection 5\n");
+ printf(" [-6 loops] Number of loops for delay injection 6\n");
+ printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
+ printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
+ printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
+ printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
+ printf(" [-y] Yield\n");
+ printf(" [-k] Kill thread with signal\n");
+ printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
+ printf(" [-t N] Number of threads (default 200)\n");
+ printf(" [-r N] Number of repetitions per thread (default 5000)\n");
+ printf(" [-d] Disable rseq system call (no initialization)\n");
+ printf(" [-D M] Disable rseq for each M threads\n");
+ printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
+ printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
+ printf(" [-v] Verbose output.\n");
+ printf(" [-h] Show this help.\n");
+ printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+ int i;
+
+ for (i = 1; i < argc; i++) {
+ if (argv[i][0] != '-')
+ continue;
+ switch (argv[i][1]) {
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
+ i++;
+ break;
+ case 'm':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ opt_modulo = atol(argv[i + 1]);
+ if (opt_modulo < 0) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ i++;
+ break;
+ case 's':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ opt_sleep = atol(argv[i + 1]);
+ if (opt_sleep < 0) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ i++;
+ break;
+ case 'y':
+ opt_yield = 1;
+ break;
+ case 'k':
+ opt_signal = 1;
+ break;
+ case 'd':
+ opt_disable_rseq = 1;
+ break;
+ case 'D':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ opt_disable_mod = atol(argv[i + 1]);
+ if (opt_disable_mod < 0) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ i++;
+ break;
+ case 't':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ opt_threads = atol(argv[i + 1]);
+ if (opt_threads < 0) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ i++;
+ break;
+ case 'r':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ opt_reps = atoll(argv[i + 1]);
+ if (opt_reps < 0) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ i++;
+ break;
+ case 'h':
+ show_usage(argc, argv);
+ goto end;
+ case 'T':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ opt_test = *argv[i + 1];
+ switch (opt_test) {
+ case 's':
+ case 'l':
+ case 'i':
+ case 'b':
+ case 'm':
+ break;
+ default:
+ show_usage(argc, argv);
+ goto error;
+ }
+ i++;
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ case 'M':
+ opt_mb = 1;
+ break;
+ default:
+ show_usage(argc, argv);
+ goto error;
+ }
+ }
+
+ loop_cnt_1 = loop_cnt[1];
+ loop_cnt_2 = loop_cnt[2];
+ loop_cnt_3 = loop_cnt[3];
+ loop_cnt_4 = loop_cnt[4];
+ loop_cnt_5 = loop_cnt[5];
+ loop_cnt_6 = loop_cnt[6];
+
+ if (set_signal_handler())
+ goto error;
+
+ if (!opt_disable_rseq && rseq_register_current_thread())
+ goto error;
+ switch (opt_test) {
+ case 's':
+ printf_verbose("spinlock\n");
+ test_percpu_spinlock();
+ break;
+ case 'l':
+ printf_verbose("linked list\n");
+ test_percpu_list();
+ break;
+ case 'b':
+ printf_verbose("buffer\n");
+ test_percpu_buffer();
+ break;
+ case 'm':
+ printf_verbose("memcpy buffer\n");
+ test_percpu_memcpy_buffer();
+ break;
+ case 'i':
+ printf_verbose("counter increment\n");
+ test_percpu_inc();
+ break;
+ }
+ if (!opt_disable_rseq && rseq_unregister_current_thread())
+ abort();
+end:
+ return 0;
+
+error:
+ return -1;
+}
diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h
new file mode 100644
index 000000000..3cea19877
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-arm.h
@@ -0,0 +1,716 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-arm.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#define RSEQ_SIG 0x53053053
+
+#define rseq_smp_mb() __asm__ __volatile__ ("dmb" ::: "memory", "cc")
+#define rseq_smp_rmb() __asm__ __volatile__ ("dmb" ::: "memory", "cc")
+#define rseq_smp_wmb() __asm__ __volatile__ ("dmb" ::: "memory", "cc")
+
+#define rseq_smp_load_acquire(p) \
+__extension__ ({ \
+ __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
+ rseq_smp_mb(); \
+ ____p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v) \
+do { \
+ rseq_smp_mb(); \
+ RSEQ_WRITE_ONCE(*p, v); \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#define __RSEQ_ASM_DEFINE_TABLE(version, flags, start_ip, \
+ post_commit_offset, abort_ip) \
+ ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".balign 32\n\t" \
+ ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ ".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ "adr r0, " __rseq_str(cs_label) "\n\t" \
+ "str r0, %[" __rseq_str(rseq_cs) "]\n\t" \
+ __rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+ "ldr r0, %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "cmp %[" __rseq_str(cpu_id) "], r0\n\t" \
+ "bne " __rseq_str(label) "\n\t"
+
+#define __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
+ abort_label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+ ".balign 32\n\t" \
+ __rseq_str(table_label) ":\n\t" \
+ ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ ".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
+ ".word " __rseq_str(RSEQ_SIG) "\n\t" \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "b %l[" __rseq_str(abort_label) "]\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, abort_label, \
+ start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
+ abort_label, 0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "b %l[" __rseq_str(cmpfail_label) "]\n\t"
+
+#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("")
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[error2]\n\t"
+#endif
+ /* final store */
+ "str %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expectnot], r0\n\t"
+ "beq %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "ldr r0, %[v]\n\t"
+ "cmp %[expectnot], r0\n\t"
+ "beq %l[error2]\n\t"
+#endif
+ "str r0, %[load]\n\t"
+ "add r0, %[voffp]\n\t"
+ "ldr r0, [r0]\n\t"
+ /* final store */
+ "str r0, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "Ir" (voffp),
+ [load] "m" (*load)
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ "ldr r0, %[v]\n\t"
+ "add r0, %[count]\n\t"
+ /* final store */
+ "str r0, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [count] "Ir" (count)
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[error2]\n\t"
+#endif
+ /* try store */
+ "str %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ "str %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[error2]\n\t"
+#endif
+ /* try store */
+ "str %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ "dmb\n\t" /* full mb provides store-release */
+ /* final store */
+ "str %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ "ldr r0, %[v2]\n\t"
+ "cmp %[expect2], r0\n\t"
+ "bne %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[error2]\n\t"
+ "ldr r0, %[v2]\n\t"
+ "cmp %[expect2], r0\n\t"
+ "bne %l[error3]\n\t"
+#endif
+ /* final store */
+ "str %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uint32_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ "str %[src], %[rseq_scratch0]\n\t"
+ "str %[dst], %[rseq_scratch1]\n\t"
+ "str %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne 7f\n\t"
+#endif
+ /* try memcpy */
+ "cmp %[len], #0\n\t" \
+ "beq 333f\n\t" \
+ "222:\n\t" \
+ "ldrb %%r0, [%[src]]\n\t" \
+ "strb %%r0, [%[dst]]\n\t" \
+ "adds %[src], #1\n\t" \
+ "adds %[dst], #1\n\t" \
+ "subs %[len], #1\n\t" \
+ "bne 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ "str %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t"
+ "b 8f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ abort, 1b, 2b, 4f)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ error2)
+#endif
+ "8:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uint32_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ "str %[src], %[rseq_scratch0]\n\t"
+ "str %[dst], %[rseq_scratch1]\n\t"
+ "str %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne 7f\n\t"
+#endif
+ /* try memcpy */
+ "cmp %[len], #0\n\t" \
+ "beq 333f\n\t" \
+ "222:\n\t" \
+ "ldrb %%r0, [%[src]]\n\t" \
+ "strb %%r0, [%[dst]]\n\t" \
+ "adds %[src], #1\n\t" \
+ "adds %[dst], #1\n\t" \
+ "subs %[len], #1\n\t" \
+ "bne 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+ "dmb\n\t" /* full mb provides store-release */
+ /* final store */
+ "str %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t"
+ "b 8f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ abort, 1b, 2b, 4f)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ error2)
+#endif
+ "8:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h
new file mode 100644
index 000000000..954f34671
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-arm64.h
@@ -0,0 +1,594 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-arm64.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * (C) Copyright 2018 - Will Deacon <will.deacon@arm.com>
+ */
+
+#define RSEQ_SIG 0xd428bc00 /* BRK #0x45E0 */
+
+#define rseq_smp_mb() __asm__ __volatile__ ("dmb ish" ::: "memory")
+#define rseq_smp_rmb() __asm__ __volatile__ ("dmb ishld" ::: "memory")
+#define rseq_smp_wmb() __asm__ __volatile__ ("dmb ishst" ::: "memory")
+
+#define rseq_smp_load_acquire(p) \
+__extension__ ({ \
+ __typeof(*p) ____p1; \
+ switch (sizeof(*p)) { \
+ case 1: \
+ asm volatile ("ldarb %w0, %1" \
+ : "=r" (*(__u8 *)p) \
+ : "Q" (*p) : "memory"); \
+ break; \
+ case 2: \
+ asm volatile ("ldarh %w0, %1" \
+ : "=r" (*(__u16 *)p) \
+ : "Q" (*p) : "memory"); \
+ break; \
+ case 4: \
+ asm volatile ("ldar %w0, %1" \
+ : "=r" (*(__u32 *)p) \
+ : "Q" (*p) : "memory"); \
+ break; \
+ case 8: \
+ asm volatile ("ldar %0, %1" \
+ : "=r" (*(__u64 *)p) \
+ : "Q" (*p) : "memory"); \
+ break; \
+ } \
+ ____p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v) \
+do { \
+ switch (sizeof(*p)) { \
+ case 1: \
+ asm volatile ("stlrb %w1, %0" \
+ : "=Q" (*p) \
+ : "r" ((__u8)v) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm volatile ("stlrh %w1, %0" \
+ : "=Q" (*p) \
+ : "r" ((__u16)v) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm volatile ("stlr %w1, %0" \
+ : "=Q" (*p) \
+ : "r" ((__u32)v) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm volatile ("stlr %1, %0" \
+ : "=Q" (*p) \
+ : "r" ((__u64)v) \
+ : "memory"); \
+ break; \
+ } \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#define RSEQ_ASM_TMP_REG32 "w15"
+#define RSEQ_ASM_TMP_REG "x15"
+#define RSEQ_ASM_TMP_REG_2 "x14"
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \
+ post_commit_offset, abort_ip) \
+ " .pushsection __rseq_table, \"aw\"\n" \
+ " .balign 32\n" \
+ __rseq_str(label) ":\n" \
+ " .long " __rseq_str(version) ", " __rseq_str(flags) "\n" \
+ " .quad " __rseq_str(start_ip) ", " \
+ __rseq_str(post_commit_offset) ", " \
+ __rseq_str(abort_ip) "\n" \
+ " .popsection\n"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ " adrp " RSEQ_ASM_TMP_REG ", " __rseq_str(cs_label) "\n" \
+ " add " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \
+ ", :lo12:" __rseq_str(cs_label) "\n" \
+ " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(rseq_cs) "]\n" \
+ __rseq_str(label) ":\n"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \
+ " b 222f\n" \
+ " .inst " __rseq_str(RSEQ_SIG) "\n" \
+ __rseq_str(label) ":\n" \
+ " b %l[" __rseq_str(abort_label) "]\n" \
+ "222:\n"
+
+#define RSEQ_ASM_OP_STORE(value, var) \
+ " str %[" __rseq_str(value) "], %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_STORE_RELEASE(value, var) \
+ " stlr %[" __rseq_str(value) "], %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \
+ RSEQ_ASM_OP_STORE(value, var) \
+ __rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(value, var, post_commit_label) \
+ RSEQ_ASM_OP_STORE_RELEASE(value, var) \
+ __rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \
+ " ldr " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" \
+ " sub " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \
+ ", %[" __rseq_str(expect) "]\n" \
+ " cbnz " RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n"
+
+#define RSEQ_ASM_OP_CMPEQ32(var, expect, label) \
+ " ldr " RSEQ_ASM_TMP_REG32 ", %[" __rseq_str(var) "]\n" \
+ " sub " RSEQ_ASM_TMP_REG32 ", " RSEQ_ASM_TMP_REG32 \
+ ", %w[" __rseq_str(expect) "]\n" \
+ " cbnz " RSEQ_ASM_TMP_REG32 ", " __rseq_str(label) "\n"
+
+#define RSEQ_ASM_OP_CMPNE(var, expect, label) \
+ " ldr " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" \
+ " sub " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \
+ ", %[" __rseq_str(expect) "]\n" \
+ " cbz " RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+ RSEQ_ASM_OP_CMPEQ32(current_cpu_id, cpu_id, label)
+
+#define RSEQ_ASM_OP_R_LOAD(var) \
+ " ldr " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_STORE(var) \
+ " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_LOAD_OFF(offset) \
+ " ldr " RSEQ_ASM_TMP_REG ", [" RSEQ_ASM_TMP_REG \
+ ", %[" __rseq_str(offset) "]]\n"
+
+#define RSEQ_ASM_OP_R_ADD(count) \
+ " add " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \
+ ", %[" __rseq_str(count) "]\n"
+
+#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \
+ " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" \
+ __rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) \
+ " cbz %[" __rseq_str(len) "], 333f\n" \
+ " mov " RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(len) "]\n" \
+ "222: sub " RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", #1\n" \
+ " ldrb " RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(src) "]" \
+ ", " RSEQ_ASM_TMP_REG_2 "]\n" \
+ " strb " RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(dst) "]" \
+ ", " RSEQ_ASM_TMP_REG_2 "]\n" \
+ " cbnz " RSEQ_ASM_TMP_REG_2 ", 222b\n" \
+ "333:\n"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "Qo" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2])
+#endif
+ RSEQ_ASM_OP_R_LOAD(v)
+ RSEQ_ASM_OP_R_STORE(load)
+ RSEQ_ASM_OP_R_LOAD_OFF(voffp)
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "Qo" (*v),
+ [expectnot] "r" (expectnot),
+ [load] "Qo" (*load),
+ [voffp] "r" (voffp)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ RSEQ_ASM_OP_R_LOAD(v)
+ RSEQ_ASM_OP_R_ADD(count)
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "Qo" (*v),
+ [count] "r" (count)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ RSEQ_ASM_OP_STORE(newv2, v2)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [expect] "r" (expect),
+ [v] "Qo" (*v),
+ [newv] "r" (newv),
+ [v2] "Qo" (*v2),
+ [newv2] "r" (newv2)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ RSEQ_ASM_OP_STORE(newv2, v2)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [expect] "r" (expect),
+ [v] "Qo" (*v),
+ [newv] "r" (newv),
+ [v2] "Qo" (*v2),
+ [newv2] "r" (newv2)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail])
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3])
+#endif
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "Qo" (*v),
+ [expect] "r" (expect),
+ [v2] "Qo" (*v2),
+ [expect2] "r" (expect2),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [expect] "r" (expect),
+ [v] "Qo" (*v),
+ [newv] "r" (newv),
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [expect] "r" (expect),
+ [v] "Qo" (*v),
+ [newv] "r" (newv),
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h
new file mode 100644
index 000000000..7f48ecf46
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-mips.h
@@ -0,0 +1,725 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Author: Paul Burton <paul.burton@mips.com>
+ * (C) Copyright 2018 MIPS Tech LLC
+ *
+ * Based on rseq-arm.h:
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#define RSEQ_SIG 0x53053053
+
+#define rseq_smp_mb() __asm__ __volatile__ ("sync" ::: "memory")
+#define rseq_smp_rmb() rseq_smp_mb()
+#define rseq_smp_wmb() rseq_smp_mb()
+
+#define rseq_smp_load_acquire(p) \
+__extension__ ({ \
+ __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
+ rseq_smp_mb(); \
+ ____p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v) \
+do { \
+ rseq_smp_mb(); \
+ RSEQ_WRITE_ONCE(*p, v); \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#if _MIPS_SZLONG == 64
+# define LONG ".dword"
+# define LONG_LA "dla"
+# define LONG_L "ld"
+# define LONG_S "sd"
+# define LONG_ADDI "daddiu"
+# define U32_U64_PAD(x) x
+#elif _MIPS_SZLONG == 32
+# define LONG ".word"
+# define LONG_LA "la"
+# define LONG_L "lw"
+# define LONG_S "sw"
+# define LONG_ADDI "addiu"
+# ifdef __BIG_ENDIAN
+# define U32_U64_PAD(x) "0x0, " x
+# else
+# define U32_U64_PAD(x) x ", 0x0"
+# endif
+#else
+# error unsupported _MIPS_SZLONG
+#endif
+
+#define __RSEQ_ASM_DEFINE_TABLE(version, flags, start_ip, \
+ post_commit_offset, abort_ip) \
+ ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".balign 32\n\t" \
+ ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ LONG_LA " $4, " __rseq_str(cs_label) "\n\t" \
+ LONG_S " $4, %[" __rseq_str(rseq_cs) "]\n\t" \
+ __rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+ "lw $4, %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "bne $4, %[" __rseq_str(cpu_id) "], " __rseq_str(label) "\n\t"
+
+#define __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
+ abort_label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+ ".balign 32\n\t" \
+ __rseq_str(table_label) ":\n\t" \
+ ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \
+ ".word " __rseq_str(RSEQ_SIG) "\n\t" \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "b %l[" __rseq_str(abort_label) "]\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, abort_label, \
+ start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
+ abort_label, 0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "b %l[" __rseq_str(cmpfail_label) "]\n\t"
+
+#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("")
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[error2]\n\t"
+#endif
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "beq $4, %[expectnot], %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_L " $4, %[v]\n\t"
+ "beq $4, %[expectnot], %l[error2]\n\t"
+#endif
+ LONG_S " $4, %[load]\n\t"
+ LONG_ADDI " $4, %[voffp]\n\t"
+ LONG_L " $4, 0($4)\n\t"
+ /* final store */
+ LONG_S " $4, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "Ir" (voffp),
+ [load] "m" (*load)
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ LONG_L " $4, %[v]\n\t"
+ LONG_ADDI " $4, %[count]\n\t"
+ /* final store */
+ LONG_S " $4, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [count] "Ir" (count)
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[error2]\n\t"
+#endif
+ /* try store */
+ LONG_S " %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[error2]\n\t"
+#endif
+ /* try store */
+ LONG_S " %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ "sync\n\t" /* full sync provides store-release */
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ LONG_L " $4, %[v2]\n\t"
+ "bne $4, %[expect2], %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[error2]\n\t"
+ LONG_L " $4, %[v2]\n\t"
+ "bne $4, %[expect2], %l[error3]\n\t"
+#endif
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uintptr_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ LONG_S " %[src], %[rseq_scratch0]\n\t"
+ LONG_S " %[dst], %[rseq_scratch1]\n\t"
+ LONG_S " %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], 7f\n\t"
+#endif
+ /* try memcpy */
+ "beqz %[len], 333f\n\t" \
+ "222:\n\t" \
+ "lb $4, 0(%[src])\n\t" \
+ "sb $4, 0(%[dst])\n\t" \
+ LONG_ADDI " %[src], 1\n\t" \
+ LONG_ADDI " %[dst], 1\n\t" \
+ LONG_ADDI " %[len], -1\n\t" \
+ "bnez %[len], 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t"
+ "b 8f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ abort, 1b, 2b, 4f)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ error2)
+#endif
+ "8:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uintptr_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ LONG_S " %[src], %[rseq_scratch0]\n\t"
+ LONG_S " %[dst], %[rseq_scratch1]\n\t"
+ LONG_S " %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], 7f\n\t"
+#endif
+ /* try memcpy */
+ "beqz %[len], 333f\n\t" \
+ "222:\n\t" \
+ "lb $4, 0(%[src])\n\t" \
+ "sb $4, 0(%[dst])\n\t" \
+ LONG_ADDI " %[src], 1\n\t" \
+ LONG_ADDI " %[dst], 1\n\t" \
+ LONG_ADDI " %[len], -1\n\t" \
+ "bnez %[len], 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+ "sync\n\t" /* full sync provides store-release */
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t"
+ "b 8f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ abort, 1b, 2b, 4f)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ error2)
+#endif
+ "8:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h
new file mode 100644
index 000000000..52630c9f4
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-ppc.h
@@ -0,0 +1,671 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-ppc.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * (C) Copyright 2016-2018 - Boqun Feng <boqun.feng@gmail.com>
+ */
+
+#define RSEQ_SIG 0x53053053
+
+#define rseq_smp_mb() __asm__ __volatile__ ("sync" ::: "memory", "cc")
+#define rseq_smp_lwsync() __asm__ __volatile__ ("lwsync" ::: "memory", "cc")
+#define rseq_smp_rmb() rseq_smp_lwsync()
+#define rseq_smp_wmb() rseq_smp_lwsync()
+
+#define rseq_smp_load_acquire(p) \
+__extension__ ({ \
+ __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
+ rseq_smp_lwsync(); \
+ ____p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_lwsync()
+
+#define rseq_smp_store_release(p, v) \
+do { \
+ rseq_smp_lwsync(); \
+ RSEQ_WRITE_ONCE(*p, v); \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+/*
+ * The __rseq_table section can be used by debuggers to better handle
+ * single-stepping through the restartable critical sections.
+ */
+
+#ifdef __PPC64__
+
+#define STORE_WORD "std "
+#define LOAD_WORD "ld "
+#define LOADX_WORD "ldx "
+#define CMP_WORD "cmpd "
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+ ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".balign 32\n\t" \
+ __rseq_str(label) ":\n\t" \
+ ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ ".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ "lis %%r17, (" __rseq_str(cs_label) ")@highest\n\t" \
+ "ori %%r17, %%r17, (" __rseq_str(cs_label) ")@higher\n\t" \
+ "rldicr %%r17, %%r17, 32, 31\n\t" \
+ "oris %%r17, %%r17, (" __rseq_str(cs_label) ")@high\n\t" \
+ "ori %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t" \
+ "std %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \
+ __rseq_str(label) ":\n\t"
+
+#else /* #ifdef __PPC64__ */
+
+#define STORE_WORD "stw "
+#define LOAD_WORD "lwz "
+#define LOADX_WORD "lwzx "
+#define CMP_WORD "cmpw "
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+ ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".balign 32\n\t" \
+ __rseq_str(label) ":\n\t" \
+ ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ /* 32-bit only supported on BE */ \
+ ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ "lis %%r17, (" __rseq_str(cs_label) ")@ha\n\t" \
+ "addi %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t" \
+ "stw %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \
+ __rseq_str(label) ":\n\t"
+
+#endif /* #ifdef __PPC64__ */
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+ "lwz %%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "cmpw cr7, %[" __rseq_str(cpu_id) "], %%r17\n\t" \
+ "bne- cr7, " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \
+ ".pushsection __rseq_failure, \"ax\"\n\t" \
+ ".long " __rseq_str(RSEQ_SIG) "\n\t" \
+ __rseq_str(label) ":\n\t" \
+ "b %l[" __rseq_str(abort_label) "]\n\t" \
+ ".popsection\n\t"
+
+/*
+ * RSEQ_ASM_OPs: asm operations for rseq
+ * RSEQ_ASM_OP_R_*: has hard-code registers in it
+ * RSEQ_ASM_OP_* (else): doesn't have hard-code registers(unless cr7)
+ */
+#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \
+ LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
+ CMP_WORD "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \
+ "bne- cr7, " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_OP_CMPNE(var, expectnot, label) \
+ LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
+ CMP_WORD "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \
+ "beq- cr7, " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_OP_STORE(value, var) \
+ STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t"
+
+/* Load @var to r17 */
+#define RSEQ_ASM_OP_R_LOAD(var) \
+ LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
+
+/* Store r17 to @var */
+#define RSEQ_ASM_OP_R_STORE(var) \
+ STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
+
+/* Add @count to r17 */
+#define RSEQ_ASM_OP_R_ADD(count) \
+ "add %%r17, %[" __rseq_str(count) "], %%r17\n\t"
+
+/* Load (r17 + voffp) to r17 */
+#define RSEQ_ASM_OP_R_LOADX(voffp) \
+ LOADX_WORD "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t"
+
+/* TODO: implement a faster memcpy. */
+#define RSEQ_ASM_OP_R_MEMCPY() \
+ "cmpdi %%r19, 0\n\t" \
+ "beq 333f\n\t" \
+ "addi %%r20, %%r20, -1\n\t" \
+ "addi %%r21, %%r21, -1\n\t" \
+ "222:\n\t" \
+ "lbzu %%r18, 1(%%r20)\n\t" \
+ "stbu %%r18, 1(%%r21)\n\t" \
+ "addi %%r19, %%r19, -1\n\t" \
+ "cmpdi %%r19, 0\n\t" \
+ "bne 222b\n\t" \
+ "333:\n\t" \
+
+#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \
+ STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
+ __rseq_str(post_commit_label) ":\n\t"
+
+#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \
+ STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \
+ __rseq_str(post_commit_label) ":\n\t"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ /* final store */
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v not equal to @expectnot */
+ RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v not equal to @expectnot */
+ RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2])
+#endif
+ /* load the value of @v */
+ RSEQ_ASM_OP_R_LOAD(v)
+ /* store it in @load */
+ RSEQ_ASM_OP_R_STORE(load)
+ /* dereference voffp(v) */
+ RSEQ_ASM_OP_R_LOADX(voffp)
+ /* final store the value at voffp(v) */
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 2)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "b" (voffp),
+ [load] "m" (*load)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ /* load the value of @v */
+ RSEQ_ASM_OP_R_LOAD(v)
+ /* add @count to it */
+ RSEQ_ASM_OP_R_ADD(count)
+ /* final store */
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 2)
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [count] "r" (count)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ /* try store */
+ RSEQ_ASM_OP_STORE(newv2, v2)
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ /* try store */
+ RSEQ_ASM_OP_STORE(newv2, v2)
+ RSEQ_INJECT_ASM(5)
+ /* for 'release' */
+ "lwsync\n\t"
+ /* final store */
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+ /* cmp @v2 equal to @expct2 */
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail])
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+ /* cmp @v2 equal to @expct2 */
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3])
+#endif
+ /* final store */
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* setup for mempcy */
+ "mr %%r19, %[len]\n\t"
+ "mr %%r20, %[src]\n\t"
+ "mr %%r21, %[dst]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ /* try memcpy */
+ RSEQ_ASM_OP_R_MEMCPY()
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17", "r18", "r19", "r20", "r21"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* setup for mempcy */
+ "mr %%r19, %[len]\n\t"
+ "mr %%r20, %[src]\n\t"
+ "mr %%r21, %[dst]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ /* try memcpy */
+ RSEQ_ASM_OP_R_MEMCPY()
+ RSEQ_INJECT_ASM(5)
+ /* for 'release' */
+ "lwsync\n\t"
+ /* final store */
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17", "r18", "r19", "r20", "r21"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#undef STORE_WORD
+#undef LOAD_WORD
+#undef LOADX_WORD
+#undef CMP_WORD
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h
new file mode 100644
index 000000000..1069e8525
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-s390.h
@@ -0,0 +1,513 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+#define RSEQ_SIG 0x53053053
+
+#define rseq_smp_mb() __asm__ __volatile__ ("bcr 15,0" ::: "memory")
+#define rseq_smp_rmb() rseq_smp_mb()
+#define rseq_smp_wmb() rseq_smp_mb()
+
+#define rseq_smp_load_acquire(p) \
+__extension__ ({ \
+ __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
+ rseq_barrier(); \
+ ____p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v) \
+do { \
+ rseq_barrier(); \
+ RSEQ_WRITE_ONCE(*p, v); \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#ifdef __s390x__
+
+#define LONG_L "lg"
+#define LONG_S "stg"
+#define LONG_LT_R "ltgr"
+#define LONG_CMP "cg"
+#define LONG_CMP_R "cgr"
+#define LONG_ADDI "aghi"
+#define LONG_ADD_R "agr"
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+ ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".balign 32\n\t" \
+ __rseq_str(label) ":\n\t" \
+ ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ ".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t"
+
+#elif __s390__
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+ ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".balign 32\n\t" \
+ __rseq_str(label) ":\n\t" \
+ ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t"
+
+#define LONG_L "l"
+#define LONG_S "st"
+#define LONG_LT_R "ltr"
+#define LONG_CMP "c"
+#define LONG_CMP_R "cr"
+#define LONG_ADDI "ahi"
+#define LONG_ADD_R "ar"
+
+#endif
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ "larl %%r0, " __rseq_str(cs_label) "\n\t" \
+ LONG_S " %%r0, %[" __rseq_str(rseq_cs) "]\n\t" \
+ __rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+ "c %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "jnz " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \
+ ".pushsection __rseq_failure, \"ax\"\n\t" \
+ ".long " __rseq_str(RSEQ_SIG) "\n\t" \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "j %l[" __rseq_str(abort_label) "]\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
+ ".pushsection __rseq_failure, \"ax\"\n\t" \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "j %l[" __rseq_str(cmpfail_label) "]\n\t" \
+ ".popsection\n\t"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/*
+ * Compare @v against @expectnot. When it does _not_ match, load @v
+ * into @load, and store the content of *@v + voffp into @v.
+ */
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " %%r1, %[v]\n\t"
+ LONG_CMP_R " %%r1, %[expectnot]\n\t"
+ "je %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_L " %%r1, %[v]\n\t"
+ LONG_CMP_R " %%r1, %[expectnot]\n\t"
+ "je %l[error2]\n\t"
+#endif
+ LONG_S " %%r1, %[load]\n\t"
+ LONG_ADD_R " %%r1, %[voffp]\n\t"
+ LONG_L " %%r1, 0(%%r1)\n\t"
+ /* final store */
+ LONG_S " %%r1, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "r" (voffp),
+ [load] "m" (*load)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0", "r1"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ LONG_L " %%r0, %[v]\n\t"
+ LONG_ADD_R " %%r0, %[count]\n\t"
+ /* final store */
+ LONG_S " %%r0, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [count] "r" (count)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* try store */
+ LONG_S " %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* s390 is TSO. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ return rseq_cmpeqv_trystorev_storev(v, expect, v2, newv2, newv, cpu);
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ LONG_CMP " %[expect2], %[v2]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[error2]\n\t"
+ LONG_CMP " %[expect2], %[v2]\n\t"
+ "jnz %l[error3]\n\t"
+#endif
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uint64_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ LONG_S " %[src], %[rseq_scratch0]\n\t"
+ LONG_S " %[dst], %[rseq_scratch1]\n\t"
+ LONG_S " %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz 7f\n\t"
+#endif
+ /* try memcpy */
+ LONG_LT_R " %[len], %[len]\n\t"
+ "jz 333f\n\t"
+ "222:\n\t"
+ "ic %%r0,0(%[src])\n\t"
+ "stc %%r0,0(%[dst])\n\t"
+ LONG_ADDI " %[src], 1\n\t"
+ LONG_ADDI " %[dst], 1\n\t"
+ LONG_ADDI " %[len], -1\n\t"
+ "jnz 222b\n\t"
+ "333:\n\t"
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t"
+ RSEQ_ASM_DEFINE_ABORT(4,
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ abort)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ error2)
+#endif
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* s390 is TSO. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ return rseq_cmpeqv_trymemcpy_storev(v, expect, dst, src, len,
+ newv, cpu);
+}
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-skip.h b/tools/testing/selftests/rseq/rseq-skip.h
new file mode 100644
index 000000000..72750b590
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-skip.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-skip.h
+ *
+ * (C) Copyright 2017-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+ return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ return -1;
+}
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
new file mode 100644
index 000000000..089410a31
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -0,0 +1,1132 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-x86.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#include <stdint.h>
+
+#define RSEQ_SIG 0x53053053
+
+#ifdef __x86_64__
+
+#define rseq_smp_mb() \
+ __asm__ __volatile__ ("lock; addl $0,-128(%%rsp)" ::: "memory", "cc")
+#define rseq_smp_rmb() rseq_barrier()
+#define rseq_smp_wmb() rseq_barrier()
+
+#define rseq_smp_load_acquire(p) \
+__extension__ ({ \
+ __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
+ rseq_barrier(); \
+ ____p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v) \
+do { \
+ rseq_barrier(); \
+ RSEQ_WRITE_ONCE(*p, v); \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+ ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".balign 32\n\t" \
+ __rseq_str(label) ":\n\t" \
+ ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ ".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ "leaq " __rseq_str(cs_label) "(%%rip), %%rax\n\t" \
+ "movq %%rax, %[" __rseq_str(rseq_cs) "]\n\t" \
+ __rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+ "cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "jnz " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \
+ ".pushsection __rseq_failure, \"ax\"\n\t" \
+ /* Disassembler-friendly signature: nopl <sig>(%rip). */\
+ ".byte 0x0f, 0x1f, 0x05\n\t" \
+ ".long " __rseq_str(RSEQ_SIG) "\n\t" \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "jmp %l[" __rseq_str(abort_label) "]\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
+ ".pushsection __rseq_failure, \"ax\"\n\t" \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "jmp %l[" __rseq_str(cmpfail_label) "]\n\t" \
+ ".popsection\n\t"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* final store */
+ "movq %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ : "memory", "cc", "rax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/*
+ * Compare @v against @expectnot. When it does _not_ match, load @v
+ * into @load, and store the content of *@v + voffp into @v.
+ */
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "movq %[v], %%rbx\n\t"
+ "cmpq %%rbx, %[expectnot]\n\t"
+ "je %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "movq %[v], %%rbx\n\t"
+ "cmpq %%rbx, %[expectnot]\n\t"
+ "je %l[error2]\n\t"
+#endif
+ "movq %%rbx, %[load]\n\t"
+ "addq %[voffp], %%rbx\n\t"
+ "movq (%%rbx), %%rbx\n\t"
+ /* final store */
+ "movq %%rbx, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "er" (voffp),
+ [load] "m" (*load)
+ : "memory", "cc", "rax", "rbx"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ /* final store */
+ "addq %[count], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [count] "er" (count)
+ : "memory", "cc", "rax"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* try store */
+ "movq %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ "movq %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ : "memory", "cc", "rax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* x86-64 is TSO. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ return rseq_cmpeqv_trystorev_storev(v, expect, v2, newv2, newv, cpu);
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ "cmpq %[v2], %[expect2]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+ "cmpq %[v2], %[expect2]\n\t"
+ "jnz %l[error3]\n\t"
+#endif
+ /* final store */
+ "movq %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ : "memory", "cc", "rax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uint64_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ "movq %[src], %[rseq_scratch0]\n\t"
+ "movq %[dst], %[rseq_scratch1]\n\t"
+ "movq %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz 7f\n\t"
+#endif
+ /* try memcpy */
+ "test %[len], %[len]\n\t" \
+ "jz 333f\n\t" \
+ "222:\n\t" \
+ "movb (%[src]), %%al\n\t" \
+ "movb %%al, (%[dst])\n\t" \
+ "inc %[src]\n\t" \
+ "inc %[dst]\n\t" \
+ "dec %[len]\n\t" \
+ "jnz 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ "movq %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ "movq %[rseq_scratch2], %[len]\n\t"
+ "movq %[rseq_scratch1], %[dst]\n\t"
+ "movq %[rseq_scratch0], %[src]\n\t"
+ RSEQ_ASM_DEFINE_ABORT(4,
+ "movq %[rseq_scratch2], %[len]\n\t"
+ "movq %[rseq_scratch1], %[dst]\n\t"
+ "movq %[rseq_scratch0], %[src]\n\t",
+ abort)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ "movq %[rseq_scratch2], %[len]\n\t"
+ "movq %[rseq_scratch1], %[dst]\n\t"
+ "movq %[rseq_scratch0], %[src]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ "movq %[rseq_scratch2], %[len]\n\t"
+ "movq %[rseq_scratch1], %[dst]\n\t"
+ "movq %[rseq_scratch0], %[src]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ "movq %[rseq_scratch2], %[len]\n\t"
+ "movq %[rseq_scratch1], %[dst]\n\t"
+ "movq %[rseq_scratch0], %[src]\n\t",
+ error2)
+#endif
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ : "memory", "cc", "rax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* x86-64 is TSO. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ return rseq_cmpeqv_trymemcpy_storev(v, expect, dst, src, len,
+ newv, cpu);
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
+
+#elif __i386__
+
+#define rseq_smp_mb() \
+ __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
+#define rseq_smp_rmb() \
+ __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
+#define rseq_smp_wmb() \
+ __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
+
+#define rseq_smp_load_acquire(p) \
+__extension__ ({ \
+ __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
+ rseq_smp_mb(); \
+ ____p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v) \
+do { \
+ rseq_smp_mb(); \
+ RSEQ_WRITE_ONCE(*p, v); \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+/*
+ * Use eax as scratch register and take memory operands as input to
+ * lessen register pressure. Especially needed when compiling in O0.
+ */
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+ ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".balign 32\n\t" \
+ __rseq_str(label) ":\n\t" \
+ ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ ".long " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ "movl $" __rseq_str(cs_label) ", %[rseq_cs]\n\t" \
+ __rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+ "cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "jnz " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \
+ ".pushsection __rseq_failure, \"ax\"\n\t" \
+ /* Disassembler-friendly signature: nopl <sig>. */ \
+ ".byte 0x0f, 0x1f, 0x05\n\t" \
+ ".long " __rseq_str(RSEQ_SIG) "\n\t" \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "jmp %l[" __rseq_str(abort_label) "]\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
+ ".pushsection __rseq_failure, \"ax\"\n\t" \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "jmp %l[" __rseq_str(cmpfail_label) "]\n\t" \
+ ".popsection\n\t"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* final store */
+ "movl %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/*
+ * Compare @v against @expectnot. When it does _not_ match, load @v
+ * into @load, and store the content of *@v + voffp into @v.
+ */
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "movl %[v], %%ebx\n\t"
+ "cmpl %%ebx, %[expectnot]\n\t"
+ "je %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "movl %[v], %%ebx\n\t"
+ "cmpl %%ebx, %[expectnot]\n\t"
+ "je %l[error2]\n\t"
+#endif
+ "movl %%ebx, %[load]\n\t"
+ "addl %[voffp], %%ebx\n\t"
+ "movl (%%ebx), %%ebx\n\t"
+ /* final store */
+ "movl %%ebx, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "ir" (voffp),
+ [load] "m" (*load)
+ : "memory", "cc", "eax", "ebx"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ /* final store */
+ "addl %[count], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [count] "ir" (count)
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* try store */
+ "movl %[newv2], %%eax\n\t"
+ "movl %%eax, %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ "movl %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "m" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %[v], %%eax\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "movl %[expect], %%eax\n\t"
+ "cmpl %[v], %%eax\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* try store */
+ "movl %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ "lock; addl $0,-128(%%esp)\n\t"
+ /* final store */
+ "movl %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "m" (expect),
+ [newv] "r" (newv)
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ "cmpl %[expect2], %[v2]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+ "cmpl %[expect2], %[v2]\n\t"
+ "jnz %l[error3]\n\t"
+#endif
+ "movl %[newv], %%eax\n\t"
+ /* final store */
+ "movl %%eax, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "m" (newv)
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+/* TODO: implement a faster memcpy. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uint32_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ "movl %[src], %[rseq_scratch0]\n\t"
+ "movl %[dst], %[rseq_scratch1]\n\t"
+ "movl %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %%eax, %[v]\n\t"
+ "jnz 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %%eax, %[v]\n\t"
+ "jnz 7f\n\t"
+#endif
+ /* try memcpy */
+ "test %[len], %[len]\n\t" \
+ "jz 333f\n\t" \
+ "222:\n\t" \
+ "movb (%[src]), %%al\n\t" \
+ "movb %%al, (%[dst])\n\t" \
+ "inc %[src]\n\t" \
+ "inc %[dst]\n\t" \
+ "dec %[len]\n\t" \
+ "jnz 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+ "movl %[newv], %%eax\n\t"
+ /* final store */
+ "movl %%eax, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t"
+ RSEQ_ASM_DEFINE_ABORT(4,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ abort)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ error2)
+#endif
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "m" (expect),
+ [newv] "m" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* TODO: implement a faster memcpy. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uint32_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ "movl %[src], %[rseq_scratch0]\n\t"
+ "movl %[dst], %[rseq_scratch1]\n\t"
+ "movl %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %%eax, %[v]\n\t"
+ "jnz 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %%eax, %[v]\n\t"
+ "jnz 7f\n\t"
+#endif
+ /* try memcpy */
+ "test %[len], %[len]\n\t" \
+ "jz 333f\n\t" \
+ "222:\n\t" \
+ "movb (%[src]), %%al\n\t" \
+ "movb %%al, (%[dst])\n\t" \
+ "inc %[src]\n\t" \
+ "inc %[dst]\n\t" \
+ "dec %[len]\n\t" \
+ "jnz 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+ "lock; addl $0,-128(%%esp)\n\t"
+ "movl %[newv], %%eax\n\t"
+ /* final store */
+ "movl %%eax, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t"
+ RSEQ_ASM_DEFINE_ABORT(4,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ abort)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ error2)
+#endif
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "m" (expect),
+ [newv] "m" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
+
+#endif
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
new file mode 100644
index 000000000..4847e97ed
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * rseq.c
+ *
+ * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; only
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <assert.h>
+#include <signal.h>
+
+#include "rseq.h"
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+__attribute__((tls_model("initial-exec"))) __thread
+volatile struct rseq __rseq_abi = {
+ .cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
+};
+
+static __attribute__((tls_model("initial-exec"))) __thread
+volatile int refcount;
+
+static void signal_off_save(sigset_t *oldset)
+{
+ sigset_t set;
+ int ret;
+
+ sigfillset(&set);
+ ret = pthread_sigmask(SIG_BLOCK, &set, oldset);
+ if (ret)
+ abort();
+}
+
+static void signal_restore(sigset_t oldset)
+{
+ int ret;
+
+ ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+ if (ret)
+ abort();
+}
+
+static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len,
+ int flags, uint32_t sig)
+{
+ return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
+}
+
+int rseq_register_current_thread(void)
+{
+ int rc, ret = 0;
+ sigset_t oldset;
+
+ signal_off_save(&oldset);
+ if (refcount++)
+ goto end;
+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
+ if (!rc) {
+ assert(rseq_current_cpu_raw() >= 0);
+ goto end;
+ }
+ if (errno != EBUSY)
+ __rseq_abi.cpu_id = -2;
+ ret = -1;
+ refcount--;
+end:
+ signal_restore(oldset);
+ return ret;
+}
+
+int rseq_unregister_current_thread(void)
+{
+ int rc, ret = 0;
+ sigset_t oldset;
+
+ signal_off_save(&oldset);
+ if (--refcount)
+ goto end;
+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq),
+ RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
+ if (!rc)
+ goto end;
+ ret = -1;
+end:
+ signal_restore(oldset);
+ return ret;
+}
+
+int32_t rseq_fallback_current_cpu(void)
+{
+ int32_t cpu;
+
+ cpu = sched_getcpu();
+ if (cpu < 0) {
+ perror("sched_getcpu()");
+ abort();
+ }
+ return cpu;
+}
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
new file mode 100644
index 000000000..c72eb70f9
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef RSEQ_H
+#define RSEQ_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sched.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sched.h>
+#include <linux/rseq.h>
+
+/*
+ * Empty code injection macros, override when testing.
+ * It is important to consider that the ASM injection macros need to be
+ * fully reentrant (e.g. do not modify the stack).
+ */
+#ifndef RSEQ_INJECT_ASM
+#define RSEQ_INJECT_ASM(n)
+#endif
+
+#ifndef RSEQ_INJECT_C
+#define RSEQ_INJECT_C(n)
+#endif
+
+#ifndef RSEQ_INJECT_INPUT
+#define RSEQ_INJECT_INPUT
+#endif
+
+#ifndef RSEQ_INJECT_CLOBBER
+#define RSEQ_INJECT_CLOBBER
+#endif
+
+#ifndef RSEQ_INJECT_FAILED
+#define RSEQ_INJECT_FAILED
+#endif
+
+extern __thread volatile struct rseq __rseq_abi;
+
+#define rseq_likely(x) __builtin_expect(!!(x), 1)
+#define rseq_unlikely(x) __builtin_expect(!!(x), 0)
+#define rseq_barrier() __asm__ __volatile__("" : : : "memory")
+
+#define RSEQ_ACCESS_ONCE(x) (*(__volatile__ __typeof__(x) *)&(x))
+#define RSEQ_WRITE_ONCE(x, v) __extension__ ({ RSEQ_ACCESS_ONCE(x) = (v); })
+#define RSEQ_READ_ONCE(x) RSEQ_ACCESS_ONCE(x)
+
+#define __rseq_str_1(x) #x
+#define __rseq_str(x) __rseq_str_1(x)
+
+#define rseq_log(fmt, args...) \
+ fprintf(stderr, fmt "(in %s() at " __FILE__ ":" __rseq_str(__LINE__)"\n", \
+ ## args, __func__)
+
+#define rseq_bug(fmt, args...) \
+ do { \
+ rseq_log(fmt, ##args); \
+ abort(); \
+ } while (0)
+
+#if defined(__x86_64__) || defined(__i386__)
+#include <rseq-x86.h>
+#elif defined(__ARMEL__)
+#include <rseq-arm.h>
+#elif defined (__AARCH64EL__)
+#include <rseq-arm64.h>
+#elif defined(__PPC__)
+#include <rseq-ppc.h>
+#elif defined(__mips__)
+#include <rseq-mips.h>
+#elif defined(__s390__)
+#include <rseq-s390.h>
+#else
+#error unsupported target
+#endif
+
+/*
+ * Register rseq for the current thread. This needs to be called once
+ * by any thread which uses restartable sequences, before they start
+ * using restartable sequences, to ensure restartable sequences
+ * succeed. A restartable sequence executed from a non-registered
+ * thread will always fail.
+ */
+int rseq_register_current_thread(void);
+
+/*
+ * Unregister rseq for current thread.
+ */
+int rseq_unregister_current_thread(void);
+
+/*
+ * Restartable sequence fallback for reading the current CPU number.
+ */
+int32_t rseq_fallback_current_cpu(void);
+
+/*
+ * Values returned can be either the current CPU number, -1 (rseq is
+ * uninitialized), or -2 (rseq initialization has failed).
+ */
+static inline int32_t rseq_current_cpu_raw(void)
+{
+ return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id);
+}
+
+/*
+ * Returns a possible CPU number, which is typically the current CPU.
+ * The returned CPU number can be used to prepare for an rseq critical
+ * section, which will confirm whether the cpu number is indeed the
+ * current one, and whether rseq is initialized.
+ *
+ * The CPU number returned by rseq_cpu_start should always be validated
+ * by passing it to a rseq asm sequence, or by comparing it to the
+ * return value of rseq_current_cpu_raw() if the rseq asm sequence
+ * does not need to be invoked.
+ */
+static inline uint32_t rseq_cpu_start(void)
+{
+ return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start);
+}
+
+static inline uint32_t rseq_current_cpu(void)
+{
+ int32_t cpu;
+
+ cpu = rseq_current_cpu_raw();
+ if (rseq_unlikely(cpu < 0))
+ cpu = rseq_fallback_current_cpu();
+ return cpu;
+}
+
+static inline void rseq_clear_rseq_cs(void)
+{
+#ifdef __LP64__
+ __rseq_abi.rseq_cs.ptr = 0;
+#else
+ __rseq_abi.rseq_cs.ptr.ptr32 = 0;
+#endif
+}
+
+/*
+ * rseq_prepare_unload() should be invoked by each thread executing a rseq
+ * critical section at least once between their last critical section and
+ * library unload of the library defining the rseq critical section
+ * (struct rseq_cs). This also applies to use of rseq in code generated by
+ * JIT: rseq_prepare_unload() should be invoked at least once by each
+ * thread executing a rseq critical section before reclaim of the memory
+ * holding the struct rseq_cs.
+ */
+static inline void rseq_prepare_unload(void)
+{
+ rseq_clear_rseq_cs();
+}
+
+#endif /* RSEQ_H_ */
diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh
new file mode 100755
index 000000000..3acd6d75f
--- /dev/null
+++ b/tools/testing/selftests/rseq/run_param_test.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+ or MIT
+
+EXTRA_ARGS=${@}
+
+OLDIFS="$IFS"
+IFS=$'\n'
+TEST_LIST=(
+ "-T s"
+ "-T l"
+ "-T b"
+ "-T b -M"
+ "-T m"
+ "-T m -M"
+ "-T i"
+)
+
+TEST_NAME=(
+ "spinlock"
+ "list"
+ "buffer"
+ "buffer with barrier"
+ "memcpy"
+ "memcpy with barrier"
+ "increment"
+)
+IFS="$OLDIFS"
+
+REPS=1000
+SLOW_REPS=100
+
+function do_tests()
+{
+ local i=0
+ while [ "$i" -lt "${#TEST_LIST[@]}" ]; do
+ echo "Running test ${TEST_NAME[$i]}"
+ ./param_test ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
+ echo "Running compare-twice test ${TEST_NAME[$i]}"
+ ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
+ let "i++"
+ done
+}
+
+echo "Default parameters"
+do_tests
+
+echo "Loop injection: 10000 loops"
+
+OLDIFS="$IFS"
+IFS=$'\n'
+INJECT_LIST=(
+ "1"
+ "2"
+ "3"
+ "4"
+ "5"
+ "6"
+ "7"
+ "8"
+ "9"
+)
+IFS="$OLDIFS"
+
+NR_LOOPS=10000
+
+i=0
+while [ "$i" -lt "${#INJECT_LIST[@]}" ]; do
+ echo "Injecting at <${INJECT_LIST[$i]}>"
+ do_tests -${INJECT_LIST[i]} ${NR_LOOPS}
+ let "i++"
+done
+NR_LOOPS=
+
+function inject_blocking()
+{
+ OLDIFS="$IFS"
+ IFS=$'\n'
+ INJECT_LIST=(
+ "7"
+ "8"
+ "9"
+ )
+ IFS="$OLDIFS"
+
+ NR_LOOPS=-1
+
+ i=0
+ while [ "$i" -lt "${#INJECT_LIST[@]}" ]; do
+ echo "Injecting at <${INJECT_LIST[$i]}>"
+ do_tests -${INJECT_LIST[i]} -1 ${@}
+ let "i++"
+ done
+ NR_LOOPS=
+}
+
+echo "Yield injection (25%)"
+inject_blocking -m 4 -y
+
+echo "Yield injection (50%)"
+inject_blocking -m 2 -y
+
+echo "Yield injection (100%)"
+inject_blocking -m 1 -y
+
+echo "Kill injection (25%)"
+inject_blocking -m 4 -k
+
+echo "Kill injection (50%)"
+inject_blocking -m 2 -k
+
+echo "Kill injection (100%)"
+inject_blocking -m 1 -k
+
+echo "Sleep injection (1ms, 25%)"
+inject_blocking -m 4 -s 1
+
+echo "Sleep injection (1ms, 50%)"
+inject_blocking -m 2 -s 1
+
+echo "Sleep injection (1ms, 100%)"
+inject_blocking -m 1 -s 1
diff --git a/tools/testing/selftests/rseq/settings b/tools/testing/selftests/rseq/settings
new file mode 100644
index 000000000..e7b941753
--- /dev/null
+++ b/tools/testing/selftests/rseq/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/rtc/.gitignore b/tools/testing/selftests/rtc/.gitignore
new file mode 100644
index 000000000..d0ad44f62
--- /dev/null
+++ b/tools/testing/selftests/rtc/.gitignore
@@ -0,0 +1,2 @@
+rtctest
+setdate
diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile
new file mode 100644
index 000000000..de9c85666
--- /dev/null
+++ b/tools/testing/selftests/rtc/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -O3 -Wl,-no-as-needed -Wall
+LDFLAGS += -lrt -lpthread -lm
+
+TEST_GEN_PROGS = rtctest
+
+TEST_GEN_PROGS_EXTENDED = setdate
+
+include ../lib.mk
diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c
new file mode 100644
index 000000000..b2065536d
--- /dev/null
+++ b/tools/testing/selftests/rtc/rtctest.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Real Time Clock Driver Test Program
+ *
+ * Copyright (c) 2018 Alexandre Belloni <alexandre.belloni@bootlin.com>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/rtc.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+#define NUM_UIE 3
+#define ALARM_DELTA 3
+
+static char *rtc_file = "/dev/rtc0";
+
+FIXTURE(rtc) {
+ int fd;
+};
+
+FIXTURE_SETUP(rtc) {
+ self->fd = open(rtc_file, O_RDONLY);
+ ASSERT_NE(-1, self->fd);
+}
+
+FIXTURE_TEARDOWN(rtc) {
+ close(self->fd);
+}
+
+TEST_F(rtc, date_read) {
+ int rc;
+ struct rtc_time rtc_tm;
+
+ /* Read the RTC time/date */
+ rc = ioctl(self->fd, RTC_RD_TIME, &rtc_tm);
+ ASSERT_NE(-1, rc);
+
+ TH_LOG("Current RTC date/time is %02d/%02d/%02d %02d:%02d:%02d.",
+ rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
+ rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
+}
+
+TEST_F(rtc, uie_read) {
+ int i, rc, irq = 0;
+ unsigned long data;
+
+ /* Turn on update interrupts */
+ rc = ioctl(self->fd, RTC_UIE_ON, 0);
+ if (rc == -1) {
+ ASSERT_EQ(EINVAL, errno);
+ TH_LOG("skip update IRQs not supported.");
+ return;
+ }
+
+ for (i = 0; i < NUM_UIE; i++) {
+ /* This read will block */
+ rc = read(self->fd, &data, sizeof(data));
+ ASSERT_NE(-1, rc);
+ irq++;
+ }
+
+ EXPECT_EQ(NUM_UIE, irq);
+
+ rc = ioctl(self->fd, RTC_UIE_OFF, 0);
+ ASSERT_NE(-1, rc);
+}
+
+TEST_F(rtc, uie_select) {
+ int i, rc, irq = 0;
+ unsigned long data;
+
+ /* Turn on update interrupts */
+ rc = ioctl(self->fd, RTC_UIE_ON, 0);
+ if (rc == -1) {
+ ASSERT_EQ(EINVAL, errno);
+ TH_LOG("skip update IRQs not supported.");
+ return;
+ }
+
+ for (i = 0; i < NUM_UIE; i++) {
+ struct timeval tv = { .tv_sec = 2 };
+ fd_set readfds;
+
+ FD_ZERO(&readfds);
+ FD_SET(self->fd, &readfds);
+ /* The select will wait until an RTC interrupt happens. */
+ rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+ ASSERT_NE(-1, rc);
+ ASSERT_NE(0, rc);
+
+ /* This read won't block */
+ rc = read(self->fd, &data, sizeof(unsigned long));
+ ASSERT_NE(-1, rc);
+ irq++;
+ }
+
+ EXPECT_EQ(NUM_UIE, irq);
+
+ rc = ioctl(self->fd, RTC_UIE_OFF, 0);
+ ASSERT_NE(-1, rc);
+}
+
+TEST_F(rtc, alarm_alm_set) {
+ struct timeval tv = { .tv_sec = ALARM_DELTA + 2 };
+ unsigned long data;
+ struct rtc_time tm;
+ fd_set readfds;
+ time_t secs, new;
+ int rc;
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+ ASSERT_NE(-1, rc);
+
+ secs = timegm((struct tm *)&tm) + ALARM_DELTA;
+ gmtime_r(&secs, (struct tm *)&tm);
+
+ rc = ioctl(self->fd, RTC_ALM_SET, &tm);
+ if (rc == -1) {
+ ASSERT_EQ(EINVAL, errno);
+ TH_LOG("skip alarms are not supported.");
+ return;
+ }
+
+ rc = ioctl(self->fd, RTC_ALM_READ, &tm);
+ ASSERT_NE(-1, rc);
+
+ TH_LOG("Alarm time now set to %02d:%02d:%02d.",
+ tm.tm_hour, tm.tm_min, tm.tm_sec);
+
+ /* Enable alarm interrupts */
+ rc = ioctl(self->fd, RTC_AIE_ON, 0);
+ ASSERT_NE(-1, rc);
+
+ FD_ZERO(&readfds);
+ FD_SET(self->fd, &readfds);
+
+ rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+ ASSERT_NE(-1, rc);
+ ASSERT_NE(0, rc);
+
+ /* Disable alarm interrupts */
+ rc = ioctl(self->fd, RTC_AIE_OFF, 0);
+ ASSERT_NE(-1, rc);
+
+ rc = read(self->fd, &data, sizeof(unsigned long));
+ ASSERT_NE(-1, rc);
+ TH_LOG("data: %lx", data);
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+ ASSERT_NE(-1, rc);
+
+ new = timegm((struct tm *)&tm);
+ ASSERT_EQ(new, secs);
+}
+
+TEST_F(rtc, alarm_wkalm_set) {
+ struct timeval tv = { .tv_sec = ALARM_DELTA + 2 };
+ struct rtc_wkalrm alarm = { 0 };
+ struct rtc_time tm;
+ unsigned long data;
+ fd_set readfds;
+ time_t secs, new;
+ int rc;
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time);
+ ASSERT_NE(-1, rc);
+
+ secs = timegm((struct tm *)&alarm.time) + ALARM_DELTA;
+ gmtime_r(&secs, (struct tm *)&alarm.time);
+
+ alarm.enabled = 1;
+
+ rc = ioctl(self->fd, RTC_WKALM_SET, &alarm);
+ if (rc == -1) {
+ ASSERT_EQ(EINVAL, errno);
+ TH_LOG("skip alarms are not supported.");
+ return;
+ }
+
+ rc = ioctl(self->fd, RTC_WKALM_RD, &alarm);
+ ASSERT_NE(-1, rc);
+
+ TH_LOG("Alarm time now set to %02d/%02d/%02d %02d:%02d:%02d.",
+ alarm.time.tm_mday, alarm.time.tm_mon + 1,
+ alarm.time.tm_year + 1900, alarm.time.tm_hour,
+ alarm.time.tm_min, alarm.time.tm_sec);
+
+ FD_ZERO(&readfds);
+ FD_SET(self->fd, &readfds);
+
+ rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+ ASSERT_NE(-1, rc);
+ ASSERT_NE(0, rc);
+
+ rc = read(self->fd, &data, sizeof(unsigned long));
+ ASSERT_NE(-1, rc);
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+ ASSERT_NE(-1, rc);
+
+ new = timegm((struct tm *)&tm);
+ ASSERT_EQ(new, secs);
+}
+
+TEST_F(rtc, alarm_alm_set_minute) {
+ struct timeval tv = { .tv_sec = 62 };
+ unsigned long data;
+ struct rtc_time tm;
+ fd_set readfds;
+ time_t secs, new;
+ int rc;
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+ ASSERT_NE(-1, rc);
+
+ secs = timegm((struct tm *)&tm) + 60 - tm.tm_sec;
+ gmtime_r(&secs, (struct tm *)&tm);
+
+ rc = ioctl(self->fd, RTC_ALM_SET, &tm);
+ if (rc == -1) {
+ ASSERT_EQ(EINVAL, errno);
+ TH_LOG("skip alarms are not supported.");
+ return;
+ }
+
+ rc = ioctl(self->fd, RTC_ALM_READ, &tm);
+ ASSERT_NE(-1, rc);
+
+ TH_LOG("Alarm time now set to %02d:%02d:%02d.",
+ tm.tm_hour, tm.tm_min, tm.tm_sec);
+
+ /* Enable alarm interrupts */
+ rc = ioctl(self->fd, RTC_AIE_ON, 0);
+ ASSERT_NE(-1, rc);
+
+ FD_ZERO(&readfds);
+ FD_SET(self->fd, &readfds);
+
+ rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+ ASSERT_NE(-1, rc);
+ ASSERT_NE(0, rc);
+
+ /* Disable alarm interrupts */
+ rc = ioctl(self->fd, RTC_AIE_OFF, 0);
+ ASSERT_NE(-1, rc);
+
+ rc = read(self->fd, &data, sizeof(unsigned long));
+ ASSERT_NE(-1, rc);
+ TH_LOG("data: %lx", data);
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+ ASSERT_NE(-1, rc);
+
+ new = timegm((struct tm *)&tm);
+ ASSERT_EQ(new, secs);
+}
+
+TEST_F(rtc, alarm_wkalm_set_minute) {
+ struct timeval tv = { .tv_sec = 62 };
+ struct rtc_wkalrm alarm = { 0 };
+ struct rtc_time tm;
+ unsigned long data;
+ fd_set readfds;
+ time_t secs, new;
+ int rc;
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time);
+ ASSERT_NE(-1, rc);
+
+ secs = timegm((struct tm *)&alarm.time) + 60 - alarm.time.tm_sec;
+ gmtime_r(&secs, (struct tm *)&alarm.time);
+
+ alarm.enabled = 1;
+
+ rc = ioctl(self->fd, RTC_WKALM_SET, &alarm);
+ if (rc == -1) {
+ ASSERT_EQ(EINVAL, errno);
+ TH_LOG("skip alarms are not supported.");
+ return;
+ }
+
+ rc = ioctl(self->fd, RTC_WKALM_RD, &alarm);
+ ASSERT_NE(-1, rc);
+
+ TH_LOG("Alarm time now set to %02d/%02d/%02d %02d:%02d:%02d.",
+ alarm.time.tm_mday, alarm.time.tm_mon + 1,
+ alarm.time.tm_year + 1900, alarm.time.tm_hour,
+ alarm.time.tm_min, alarm.time.tm_sec);
+
+ FD_ZERO(&readfds);
+ FD_SET(self->fd, &readfds);
+
+ rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+ ASSERT_NE(-1, rc);
+ ASSERT_NE(0, rc);
+
+ rc = read(self->fd, &data, sizeof(unsigned long));
+ ASSERT_NE(-1, rc);
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+ ASSERT_NE(-1, rc);
+
+ new = timegm((struct tm *)&tm);
+ ASSERT_EQ(new, secs);
+}
+
+static void __attribute__((constructor))
+__constructor_order_last(void)
+{
+ if (!__constructor_order)
+ __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD;
+}
+
+int main(int argc, char **argv)
+{
+ switch (argc) {
+ case 2:
+ rtc_file = argv[1];
+ /* FALLTHROUGH */
+ case 1:
+ break;
+ default:
+ fprintf(stderr, "usage: %s [rtcdev]\n", argv[0]);
+ return 1;
+ }
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/rtc/setdate.c b/tools/testing/selftests/rtc/setdate.c
new file mode 100644
index 000000000..2cb78489e
--- /dev/null
+++ b/tools/testing/selftests/rtc/setdate.c
@@ -0,0 +1,86 @@
+/* Real Time Clock Driver Test
+ * by: Benjamin Gaignard (benjamin.gaignard@linaro.org)
+ *
+ * To build
+ * gcc rtctest_setdate.c -o rtctest_setdate
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <linux/rtc.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+
+static const char default_time[] = "00:00:00";
+
+int main(int argc, char **argv)
+{
+ int fd, retval;
+ struct rtc_time new, current;
+ const char *rtc, *date;
+ const char *time = default_time;
+
+ switch (argc) {
+ case 4:
+ time = argv[3];
+ /* FALLTHROUGH */
+ case 3:
+ date = argv[2];
+ rtc = argv[1];
+ break;
+ default:
+ fprintf(stderr, "usage: rtctest_setdate <rtcdev> <DD-MM-YYYY> [HH:MM:SS]\n");
+ return 1;
+ }
+
+ fd = open(rtc, O_RDONLY);
+ if (fd == -1) {
+ perror(rtc);
+ exit(errno);
+ }
+
+ sscanf(date, "%d-%d-%d", &new.tm_mday, &new.tm_mon, &new.tm_year);
+ new.tm_mon -= 1;
+ new.tm_year -= 1900;
+ sscanf(time, "%d:%d:%d", &new.tm_hour, &new.tm_min, &new.tm_sec);
+
+ fprintf(stderr, "Test will set RTC date/time to %d-%d-%d, %02d:%02d:%02d.\n",
+ new.tm_mday, new.tm_mon + 1, new.tm_year + 1900,
+ new.tm_hour, new.tm_min, new.tm_sec);
+
+ /* Write the new date in RTC */
+ retval = ioctl(fd, RTC_SET_TIME, &new);
+ if (retval == -1) {
+ perror("RTC_SET_TIME ioctl");
+ close(fd);
+ exit(errno);
+ }
+
+ /* Read back */
+ retval = ioctl(fd, RTC_RD_TIME, &current);
+ if (retval == -1) {
+ perror("RTC_RD_TIME ioctl");
+ exit(errno);
+ }
+
+ fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
+ current.tm_mday, current.tm_mon + 1, current.tm_year + 1900,
+ current.tm_hour, current.tm_min, current.tm_sec);
+
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/seccomp/.gitignore b/tools/testing/selftests/seccomp/.gitignore
new file mode 100644
index 000000000..5af29d3a1
--- /dev/null
+++ b/tools/testing/selftests/seccomp/.gitignore
@@ -0,0 +1,2 @@
+seccomp_bpf
+seccomp_benchmark
diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
new file mode 100644
index 000000000..1760b3e39
--- /dev/null
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+include ../lib.mk
+
+.PHONY: all clean
+
+BINARIES := seccomp_bpf seccomp_benchmark
+CFLAGS += -Wl,-no-as-needed -Wall
+
+seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h
+ $(CC) $(CFLAGS) $(LDFLAGS) $< -lpthread -o $@
+
+TEST_PROGS += $(BINARIES)
+EXTRA_CLEAN := $(BINARIES)
+
+all: $(BINARIES)
diff --git a/tools/testing/selftests/seccomp/config b/tools/testing/selftests/seccomp/config
new file mode 100644
index 000000000..db1e11b08
--- /dev/null
+++ b/tools/testing/selftests/seccomp/config
@@ -0,0 +1,2 @@
+CONFIG_SECCOMP=y
+CONFIG_SECCOMP_FILTER=y
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
new file mode 100644
index 000000000..5838c8697
--- /dev/null
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -0,0 +1,99 @@
+/*
+ * Strictly speaking, this is not a test. But it can report during test
+ * runs so relative performace can be measured.
+ */
+#define _GNU_SOURCE
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+
+unsigned long long timing(clockid_t clk_id, unsigned long long samples)
+{
+ pid_t pid, ret;
+ unsigned long long i;
+ struct timespec start, finish;
+
+ pid = getpid();
+ assert(clock_gettime(clk_id, &start) == 0);
+ for (i = 0; i < samples; i++) {
+ ret = syscall(__NR_getpid);
+ assert(pid == ret);
+ }
+ assert(clock_gettime(clk_id, &finish) == 0);
+
+ i = finish.tv_sec - start.tv_sec;
+ i *= 1000000000;
+ i += finish.tv_nsec - start.tv_nsec;
+
+ printf("%lu.%09lu - %lu.%09lu = %llu\n",
+ finish.tv_sec, finish.tv_nsec,
+ start.tv_sec, start.tv_nsec,
+ i);
+
+ return i;
+}
+
+unsigned long long calibrate(void)
+{
+ unsigned long long i;
+
+ printf("Calibrating reasonable sample size...\n");
+
+ for (i = 5; ; i++) {
+ unsigned long long samples = 1 << i;
+
+ /* Find something that takes more than 5 seconds to run. */
+ if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5)
+ return samples;
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ unsigned long long samples;
+ unsigned long long native, filtered;
+
+ if (argc > 1)
+ samples = strtoull(argv[1], NULL, 0);
+ else
+ samples = calibrate();
+
+ printf("Benchmarking %llu samples...\n", samples);
+
+ native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+ printf("getpid native: %llu ns\n", native);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ assert(ret == 0);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ assert(ret == 0);
+
+ filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+ printf("getpid RET_ALLOW: %llu ns\n", filtered);
+
+ printf("Estimated seccomp overhead per syscall: %llu ns\n",
+ filtered - native);
+
+ if (filtered == native)
+ printf("Trying running again with more samples.\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
new file mode 100644
index 000000000..14cad657b
--- /dev/null
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -0,0 +1,2997 @@
+/*
+ * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
+ * Use of this source code is governed by the GPLv2 license.
+ *
+ * Test code for seccomp bpf.
+ */
+
+#include <sys/types.h>
+
+/*
+ * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
+ * we need to use the kernel's siginfo.h file and trick glibc
+ * into accepting it.
+ */
+#if !__GLIBC_PREREQ(2, 26)
+# include <asm/siginfo.h>
+# define __have_siginfo_t 1
+# define __have_sigval_t 1
+# define __have_sigevent_t 1
+#endif
+
+#include <errno.h>
+#include <linux/filter.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+#include <linux/prctl.h>
+#include <linux/ptrace.h>
+#include <linux/seccomp.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <time.h>
+#include <linux/elf.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <sys/times.h>
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "../kselftest_harness.h"
+
+#ifndef PR_SET_PTRACER
+# define PR_SET_PTRACER 0x59616d61
+#endif
+
+#ifndef PR_SET_NO_NEW_PRIVS
+#define PR_SET_NO_NEW_PRIVS 38
+#define PR_GET_NO_NEW_PRIVS 39
+#endif
+
+#ifndef PR_SECCOMP_EXT
+#define PR_SECCOMP_EXT 43
+#endif
+
+#ifndef SECCOMP_EXT_ACT
+#define SECCOMP_EXT_ACT 1
+#endif
+
+#ifndef SECCOMP_EXT_ACT_TSYNC
+#define SECCOMP_EXT_ACT_TSYNC 1
+#endif
+
+#ifndef SECCOMP_MODE_STRICT
+#define SECCOMP_MODE_STRICT 1
+#endif
+
+#ifndef SECCOMP_MODE_FILTER
+#define SECCOMP_MODE_FILTER 2
+#endif
+
+#ifndef SECCOMP_RET_ALLOW
+struct seccomp_data {
+ int nr;
+ __u32 arch;
+ __u64 instruction_pointer;
+ __u64 args[6];
+};
+#endif
+
+#ifndef SECCOMP_RET_KILL_PROCESS
+#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
+#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */
+#endif
+#ifndef SECCOMP_RET_KILL
+#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
+#endif
+#ifndef SECCOMP_RET_LOG
+#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */
+#endif
+
+#ifndef __NR_seccomp
+# if defined(__i386__)
+# define __NR_seccomp 354
+# elif defined(__x86_64__)
+# define __NR_seccomp 317
+# elif defined(__arm__)
+# define __NR_seccomp 383
+# elif defined(__aarch64__)
+# define __NR_seccomp 277
+# elif defined(__hppa__)
+# define __NR_seccomp 338
+# elif defined(__powerpc__)
+# define __NR_seccomp 358
+# elif defined(__s390__)
+# define __NR_seccomp 348
+# else
+# warning "seccomp syscall number unknown for this architecture"
+# define __NR_seccomp 0xffff
+# endif
+#endif
+
+#ifndef SECCOMP_SET_MODE_STRICT
+#define SECCOMP_SET_MODE_STRICT 0
+#endif
+
+#ifndef SECCOMP_SET_MODE_FILTER
+#define SECCOMP_SET_MODE_FILTER 1
+#endif
+
+#ifndef SECCOMP_GET_ACTION_AVAIL
+#define SECCOMP_GET_ACTION_AVAIL 2
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_TSYNC
+#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_LOG
+#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
+#endif
+
+#ifndef PTRACE_SECCOMP_GET_METADATA
+#define PTRACE_SECCOMP_GET_METADATA 0x420d
+
+struct seccomp_metadata {
+ __u64 filter_off; /* Input: which filter */
+ __u64 flags; /* Output: filter's flags */
+};
+#endif
+
+#ifndef seccomp
+int seccomp(unsigned int op, unsigned int flags, void *args)
+{
+ errno = 0;
+ return syscall(__NR_seccomp, op, flags, args);
+}
+#endif
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
+#else
+#error "wut? Unknown __BYTE_ORDER?!"
+#endif
+
+#define SIBLING_EXIT_UNKILLED 0xbadbeef
+#define SIBLING_EXIT_FAILURE 0xbadface
+#define SIBLING_EXIT_NEWPRIVS 0xbadfeed
+
+TEST(mode_strict_support)
+{
+ long ret;
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support CONFIG_SECCOMP");
+ }
+ syscall(__NR_exit, 0);
+}
+
+TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
+{
+ long ret;
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support CONFIG_SECCOMP");
+ }
+ syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
+ NULL, NULL, NULL);
+ EXPECT_FALSE(true) {
+ TH_LOG("Unreachable!");
+ }
+}
+
+/* Note! This doesn't test no new privs behavior */
+TEST(no_new_privs_support)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+}
+
+/* Tests kernel support by checking for a copy_from_user() fault on NULL. */
+TEST(mode_filter_support)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EFAULT, errno) {
+ TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
+ }
+}
+
+TEST(mode_filter_without_nnp)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
+ ASSERT_LE(0, ret) {
+ TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
+ }
+ errno = 0;
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ /* Succeeds with CAP_SYS_ADMIN, fails without */
+ /* TODO(wad) check caps not euid */
+ if (geteuid()) {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EACCES, errno);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+}
+
+#define MAX_INSNS_PER_PATH 32768
+
+TEST(filter_size_limits)
+{
+ int i;
+ int count = BPF_MAXINSNS + 1;
+ struct sock_filter allow[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter *filter;
+ struct sock_fprog prog = { };
+ long ret;
+
+ filter = calloc(count, sizeof(*filter));
+ ASSERT_NE(NULL, filter);
+
+ for (i = 0; i < count; i++)
+ filter[i] = allow[0];
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ prog.filter = filter;
+ prog.len = count;
+
+ /* Too many filter instructions in a single filter. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_NE(0, ret) {
+ TH_LOG("Installing %d insn filter was allowed", prog.len);
+ }
+
+ /* One less is okay, though. */
+ prog.len -= 1;
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
+ }
+}
+
+TEST(filter_chain_limits)
+{
+ int i;
+ int count = BPF_MAXINSNS;
+ struct sock_filter allow[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter *filter;
+ struct sock_fprog prog = { };
+ long ret;
+
+ filter = calloc(count, sizeof(*filter));
+ ASSERT_NE(NULL, filter);
+
+ for (i = 0; i < count; i++)
+ filter[i] = allow[0];
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ prog.filter = filter;
+ prog.len = 1;
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ prog.len = count;
+
+ /* Too many total filter instructions. */
+ for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ if (ret != 0)
+ break;
+ }
+ ASSERT_NE(0, ret) {
+ TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
+ i, count, i * (count + 4));
+ }
+}
+
+TEST(mode_filter_cannot_move_to_strict)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+
+TEST(mode_filter_get_seccomp)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+ EXPECT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+ EXPECT_EQ(2, ret);
+}
+
+
+TEST(ALLOW_all)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+}
+
+TEST(empty_prog)
+{
+ struct sock_filter filter[] = {
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+TEST(log_all)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ /* getppid() should succeed and be logged (no check for logging) */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+}
+
+TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+ EXPECT_EQ(0, syscall(__NR_getpid)) {
+ TH_LOG("getpid() shouldn't ever return");
+ }
+}
+
+/* return code >= 0x80000000 is unused. */
+TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+ EXPECT_EQ(0, syscall(__NR_getpid)) {
+ TH_LOG("getpid() shouldn't ever return");
+ }
+}
+
+TEST_SIGNAL(KILL_all, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+}
+
+TEST_SIGNAL(KILL_one, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
+{
+ void *fatal_address;
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ /* Only both with lower 32-bit for now. */
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
+ (unsigned long)&fatal_address, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+ struct tms timebuf;
+ clock_t clock = times(&timebuf);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_LE(clock, syscall(__NR_times, &timebuf));
+ /* times() should never return. */
+ EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
+}
+
+TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
+{
+#ifndef __NR_mmap2
+ int sysno = __NR_mmap;
+#else
+ int sysno = __NR_mmap2;
+#endif
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ /* Only both with lower 32-bit for now. */
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+ int fd;
+ void *map1, *map2;
+ int page_size = sysconf(_SC_PAGESIZE);
+
+ ASSERT_LT(0, page_size);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ fd = open("/dev/zero", O_RDONLY);
+ ASSERT_NE(-1, fd);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ map1 = (void *)syscall(sysno,
+ NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
+ EXPECT_NE(MAP_FAILED, map1);
+ /* mmap2() should never return. */
+ map2 = (void *)syscall(sysno,
+ NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
+ EXPECT_EQ(MAP_FAILED, map2);
+
+ /* The test failed, so clean up the resources. */
+ munmap(map1, page_size);
+ munmap(map2, page_size);
+ close(fd);
+}
+
+/* This is a thread task to die via seccomp filter violation. */
+void *kill_thread(void *data)
+{
+ bool die = (bool)data;
+
+ if (die) {
+ prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+ return (void *)SIBLING_EXIT_FAILURE;
+ }
+
+ return (void *)SIBLING_EXIT_UNKILLED;
+}
+
+/* Prepare a thread that will kill itself or both of us. */
+void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
+{
+ pthread_t thread;
+ void *status;
+ /* Kill only when calling __NR_prctl. */
+ struct sock_filter filter_thread[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog_thread = {
+ .len = (unsigned short)ARRAY_SIZE(filter_thread),
+ .filter = filter_thread,
+ };
+ struct sock_filter filter_process[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog_process = {
+ .len = (unsigned short)ARRAY_SIZE(filter_process),
+ .filter = filter_process,
+ };
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
+ kill_process ? &prog_process : &prog_thread));
+
+ /*
+ * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
+ * flag cannot be downgraded by a new filter.
+ */
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
+
+ /* Start a thread that will exit immediately. */
+ ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
+ ASSERT_EQ(0, pthread_join(thread, &status));
+ ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
+
+ /* Start a thread that will die immediately. */
+ ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
+ ASSERT_EQ(0, pthread_join(thread, &status));
+ ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
+
+ /*
+ * If we get here, only the spawned thread died. Let the parent know
+ * the whole process didn't die (i.e. this thread, the spawner,
+ * stayed running).
+ */
+ exit(42);
+}
+
+TEST(KILL_thread)
+{
+ int status;
+ pid_t child_pid;
+
+ child_pid = fork();
+ ASSERT_LE(0, child_pid);
+ if (child_pid == 0) {
+ kill_thread_or_group(_metadata, false);
+ _exit(38);
+ }
+
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+ /* If only the thread was killed, we'll see exit 42. */
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(42, WEXITSTATUS(status));
+}
+
+TEST(KILL_process)
+{
+ int status;
+ pid_t child_pid;
+
+ child_pid = fork();
+ ASSERT_LE(0, child_pid);
+ if (child_pid == 0) {
+ kill_thread_or_group(_metadata, true);
+ _exit(38);
+ }
+
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+ /* If the entire process was killed, we'll see SIGSYS. */
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(SIGSYS, WTERMSIG(status));
+}
+
+/* TODO(wad) add 64-bit versus 32-bit arg tests. */
+TEST(arg_out_of_range)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+#define ERRNO_FILTER(name, errno) \
+ struct sock_filter _read_filter_##name[] = { \
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \
+ offsetof(struct seccomp_data, nr)), \
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \
+ }; \
+ struct sock_fprog prog_##name = { \
+ .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \
+ .filter = _read_filter_##name, \
+ }
+
+/* Make sure basic errno values are correctly passed through a filter. */
+TEST(ERRNO_valid)
+{
+ ERRNO_FILTER(valid, E2BIG);
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(E2BIG, errno);
+}
+
+/* Make sure an errno of zero is correctly handled by the arch code. */
+TEST(ERRNO_zero)
+{
+ ERRNO_FILTER(zero, 0);
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* "errno" of 0 is ok. */
+ EXPECT_EQ(0, read(0, NULL, 0));
+}
+
+/*
+ * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
+ * This tests that the errno value gets capped correctly, fixed by
+ * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
+ */
+TEST(ERRNO_capped)
+{
+ ERRNO_FILTER(capped, 4096);
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(4095, errno);
+}
+
+/*
+ * Filters are processed in reverse order: last applied is executed first.
+ * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
+ * SECCOMP_RET_DATA mask results will follow the most recently applied
+ * matching filter return (and not the lowest or highest value).
+ */
+TEST(ERRNO_order)
+{
+ ERRNO_FILTER(first, 11);
+ ERRNO_FILTER(second, 13);
+ ERRNO_FILTER(third, 12);
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(12, errno);
+}
+
+FIXTURE_DATA(TRAP) {
+ struct sock_fprog prog;
+};
+
+FIXTURE_SETUP(TRAP)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ memset(&self->prog, 0, sizeof(self->prog));
+ self->prog.filter = malloc(sizeof(filter));
+ ASSERT_NE(NULL, self->prog.filter);
+ memcpy(self->prog.filter, filter, sizeof(filter));
+ self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+}
+
+FIXTURE_TEARDOWN(TRAP)
+{
+ if (self->prog.filter)
+ free(self->prog.filter);
+}
+
+TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+ ASSERT_EQ(0, ret);
+ syscall(__NR_getpid);
+}
+
+/* Ensure that SIGSYS overrides SIG_IGN */
+TEST_F_SIGNAL(TRAP, ign, SIGSYS)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ signal(SIGSYS, SIG_IGN);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+ ASSERT_EQ(0, ret);
+ syscall(__NR_getpid);
+}
+
+static siginfo_t TRAP_info;
+static volatile int TRAP_nr;
+static void TRAP_action(int nr, siginfo_t *info, void *void_context)
+{
+ memcpy(&TRAP_info, info, sizeof(TRAP_info));
+ TRAP_nr = nr;
+}
+
+TEST_F(TRAP, handler)
+{
+ int ret, test;
+ struct sigaction act;
+ sigset_t mask;
+
+ memset(&act, 0, sizeof(act));
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGSYS);
+
+ act.sa_sigaction = &TRAP_action;
+ act.sa_flags = SA_SIGINFO;
+ ret = sigaction(SIGSYS, &act, NULL);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("sigaction failed");
+ }
+ ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("sigprocmask failed");
+ }
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+ ASSERT_EQ(0, ret);
+ TRAP_nr = 0;
+ memset(&TRAP_info, 0, sizeof(TRAP_info));
+ /* Expect the registers to be rolled back. (nr = error) may vary
+ * based on arch. */
+ ret = syscall(__NR_getpid);
+ /* Silence gcc warning about volatile. */
+ test = TRAP_nr;
+ EXPECT_EQ(SIGSYS, test);
+ struct local_sigsys {
+ void *_call_addr; /* calling user insn */
+ int _syscall; /* triggering system call number */
+ unsigned int _arch; /* AUDIT_ARCH_* of syscall */
+ } *sigsys = (struct local_sigsys *)
+#ifdef si_syscall
+ &(TRAP_info.si_call_addr);
+#else
+ &TRAP_info.si_pid;
+#endif
+ EXPECT_EQ(__NR_getpid, sigsys->_syscall);
+ /* Make sure arch is non-zero. */
+ EXPECT_NE(0, sigsys->_arch);
+ EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
+}
+
+FIXTURE_DATA(precedence) {
+ struct sock_fprog allow;
+ struct sock_fprog log;
+ struct sock_fprog trace;
+ struct sock_fprog error;
+ struct sock_fprog trap;
+ struct sock_fprog kill;
+};
+
+FIXTURE_SETUP(precedence)
+{
+ struct sock_filter allow_insns[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter log_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+ };
+ struct sock_filter trace_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
+ };
+ struct sock_filter error_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
+ };
+ struct sock_filter trap_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
+ };
+ struct sock_filter kill_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ };
+
+ memset(self, 0, sizeof(*self));
+#define FILTER_ALLOC(_x) \
+ self->_x.filter = malloc(sizeof(_x##_insns)); \
+ ASSERT_NE(NULL, self->_x.filter); \
+ memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
+ self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
+ FILTER_ALLOC(allow);
+ FILTER_ALLOC(log);
+ FILTER_ALLOC(trace);
+ FILTER_ALLOC(error);
+ FILTER_ALLOC(trap);
+ FILTER_ALLOC(kill);
+}
+
+FIXTURE_TEARDOWN(precedence)
+{
+#define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
+ FILTER_FREE(allow);
+ FILTER_FREE(log);
+ FILTER_FREE(trace);
+ FILTER_FREE(error);
+ FILTER_FREE(trap);
+ FILTER_FREE(kill);
+}
+
+TEST_F(precedence, allow_ok)
+{
+ pid_t parent, res = 0;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ res = syscall(__NR_getppid);
+ EXPECT_EQ(parent, res);
+}
+
+TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
+{
+ pid_t parent, res = 0;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ res = syscall(__NR_getppid);
+ EXPECT_EQ(parent, res);
+ /* getpid() should never return. */
+ res = syscall(__NR_getpid);
+ EXPECT_EQ(0, res);
+}
+
+TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, errno_is_third)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, errno_is_third_in_any_order)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, trace_is_fourth)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* No ptracer */
+ EXPECT_EQ(-1, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, trace_is_fourth_in_any_order)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* No ptracer */
+ EXPECT_EQ(-1, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, log_is_fifth)
+{
+ pid_t mypid, parent;
+ long ret;
+
+ mypid = getpid();
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* Should also work just fine */
+ EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, log_is_fifth_in_any_order)
+{
+ pid_t mypid, parent;
+ long ret;
+
+ mypid = getpid();
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* Should also work just fine */
+ EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
+#ifndef PTRACE_O_TRACESECCOMP
+#define PTRACE_O_TRACESECCOMP 0x00000080
+#endif
+
+/* Catch the Ubuntu 12.04 value error. */
+#if PTRACE_EVENT_SECCOMP != 7
+#undef PTRACE_EVENT_SECCOMP
+#endif
+
+#ifndef PTRACE_EVENT_SECCOMP
+#define PTRACE_EVENT_SECCOMP 7
+#endif
+
+#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
+bool tracer_running;
+void tracer_stop(int sig)
+{
+ tracer_running = false;
+}
+
+typedef void tracer_func_t(struct __test_metadata *_metadata,
+ pid_t tracee, int status, void *args);
+
+void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
+ tracer_func_t tracer_func, void *args, bool ptrace_syscall)
+{
+ int ret = -1;
+ struct sigaction action = {
+ .sa_handler = tracer_stop,
+ };
+
+ /* Allow external shutdown. */
+ tracer_running = true;
+ ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
+
+ errno = 0;
+ while (ret == -1 && errno != EINVAL)
+ ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
+ ASSERT_EQ(0, ret) {
+ kill(tracee, SIGKILL);
+ }
+ /* Wait for attach stop */
+ wait(NULL);
+
+ ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
+ PTRACE_O_TRACESYSGOOD :
+ PTRACE_O_TRACESECCOMP);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
+ kill(tracee, SIGKILL);
+ }
+ ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
+ tracee, NULL, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Unblock the tracee */
+ ASSERT_EQ(1, write(fd, "A", 1));
+ ASSERT_EQ(0, close(fd));
+
+ /* Run until we're shut down. Must assert to stop execution. */
+ while (tracer_running) {
+ int status;
+
+ if (wait(&status) != tracee)
+ continue;
+ if (WIFSIGNALED(status) || WIFEXITED(status))
+ /* Child is dead. Time to go. */
+ return;
+
+ /* Check if this is a seccomp event. */
+ ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
+
+ tracer_func(_metadata, tracee, status, args);
+
+ ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
+ tracee, NULL, 0);
+ ASSERT_EQ(0, ret);
+ }
+ /* Directly report the status of our test harness results. */
+ syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+}
+
+/* Common tracer setup/teardown functions. */
+void cont_handler(int num)
+{ }
+pid_t setup_trace_fixture(struct __test_metadata *_metadata,
+ tracer_func_t func, void *args, bool ptrace_syscall)
+{
+ char sync;
+ int pipefd[2];
+ pid_t tracer_pid;
+ pid_t tracee = getpid();
+
+ /* Setup a pipe for clean synchronization. */
+ ASSERT_EQ(0, pipe(pipefd));
+
+ /* Fork a child which we'll promote to tracer */
+ tracer_pid = fork();
+ ASSERT_LE(0, tracer_pid);
+ signal(SIGALRM, cont_handler);
+ if (tracer_pid == 0) {
+ close(pipefd[0]);
+ start_tracer(_metadata, pipefd[1], tracee, func, args,
+ ptrace_syscall);
+ syscall(__NR_exit, 0);
+ }
+ close(pipefd[1]);
+ prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
+ read(pipefd[0], &sync, 1);
+ close(pipefd[0]);
+
+ return tracer_pid;
+}
+void teardown_trace_fixture(struct __test_metadata *_metadata,
+ pid_t tracer)
+{
+ if (tracer) {
+ int status;
+ /*
+ * Extract the exit code from the other process and
+ * adopt it for ourselves in case its asserts failed.
+ */
+ ASSERT_EQ(0, kill(tracer, SIGUSR1));
+ ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
+ if (WEXITSTATUS(status))
+ _metadata->passed = 0;
+ }
+}
+
+/* "poke" tracer arguments and function. */
+struct tracer_args_poke_t {
+ unsigned long poke_addr;
+};
+
+void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
+ void *args)
+{
+ int ret;
+ unsigned long msg;
+ struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
+
+ ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
+ EXPECT_EQ(0, ret);
+ /* If this fails, don't try to recover. */
+ ASSERT_EQ(0x1001, msg) {
+ kill(tracee, SIGKILL);
+ }
+ /*
+ * Poke in the message.
+ * Registers are not touched to try to keep this relatively arch
+ * agnostic.
+ */
+ ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
+ EXPECT_EQ(0, ret);
+}
+
+FIXTURE_DATA(TRACE_poke) {
+ struct sock_fprog prog;
+ pid_t tracer;
+ long poked;
+ struct tracer_args_poke_t tracer_args;
+};
+
+FIXTURE_SETUP(TRACE_poke)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ self->poked = 0;
+ memset(&self->prog, 0, sizeof(self->prog));
+ self->prog.filter = malloc(sizeof(filter));
+ ASSERT_NE(NULL, self->prog.filter);
+ memcpy(self->prog.filter, filter, sizeof(filter));
+ self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+
+ /* Set up tracer args. */
+ self->tracer_args.poke_addr = (unsigned long)&self->poked;
+
+ /* Launch tracer. */
+ self->tracer = setup_trace_fixture(_metadata, tracer_poke,
+ &self->tracer_args, false);
+}
+
+FIXTURE_TEARDOWN(TRACE_poke)
+{
+ teardown_trace_fixture(_metadata, self->tracer);
+ if (self->prog.filter)
+ free(self->prog.filter);
+}
+
+TEST_F(TRACE_poke, read_has_side_effects)
+{
+ ssize_t ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(0, self->poked);
+ ret = read(-1, NULL, 0);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(0x1001, self->poked);
+}
+
+TEST_F(TRACE_poke, getpid_runs_normally)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(0, self->poked);
+ EXPECT_NE(0, syscall(__NR_getpid));
+ EXPECT_EQ(0, self->poked);
+}
+
+#if defined(__x86_64__)
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM orig_rax
+# define SYSCALL_RET rax
+#elif defined(__i386__)
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM orig_eax
+# define SYSCALL_RET eax
+#elif defined(__arm__)
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM ARM_r7
+# define SYSCALL_RET ARM_r0
+#elif defined(__aarch64__)
+# define ARCH_REGS struct user_pt_regs
+# define SYSCALL_NUM regs[8]
+# define SYSCALL_RET regs[0]
+#elif defined(__hppa__)
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM gr[20]
+# define SYSCALL_RET gr[28]
+#elif defined(__powerpc__)
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM gpr[0]
+# define SYSCALL_RET gpr[3]
+#elif defined(__s390__)
+# define ARCH_REGS s390_regs
+# define SYSCALL_NUM gprs[2]
+# define SYSCALL_RET gprs[2]
+#elif defined(__mips__)
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM regs[2]
+# define SYSCALL_SYSCALL_NUM regs[4]
+# define SYSCALL_RET regs[2]
+# define SYSCALL_NUM_RET_SHARE_REG
+#else
+# error "Do not know how to find your architecture's registers and syscalls"
+#endif
+
+/* When the syscall return can't be changed, stub out the tests for it. */
+#ifdef SYSCALL_NUM_RET_SHARE_REG
+# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action)
+#else
+# define EXPECT_SYSCALL_RETURN(val, action) \
+ do { \
+ errno = 0; \
+ if (val < 0) { \
+ EXPECT_EQ(-1, action); \
+ EXPECT_EQ(-(val), errno); \
+ } else { \
+ EXPECT_EQ(val, action); \
+ } \
+ } while (0)
+#endif
+
+/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
+ * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
+ */
+#if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
+#define HAVE_GETREGS
+#endif
+
+/* Architecture-specific syscall fetching routine. */
+int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
+{
+ ARCH_REGS regs;
+#ifdef HAVE_GETREGS
+ EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
+ TH_LOG("PTRACE_GETREGS failed");
+ return -1;
+ }
+#else
+ struct iovec iov;
+
+ iov.iov_base = &regs;
+ iov.iov_len = sizeof(regs);
+ EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
+ TH_LOG("PTRACE_GETREGSET failed");
+ return -1;
+ }
+#endif
+
+#if defined(__mips__)
+ if (regs.SYSCALL_NUM == __NR_O32_Linux)
+ return regs.SYSCALL_SYSCALL_NUM;
+#endif
+ return regs.SYSCALL_NUM;
+}
+
+/* Architecture-specific syscall changing routine. */
+void change_syscall(struct __test_metadata *_metadata,
+ pid_t tracee, int syscall, int result)
+{
+ int ret;
+ ARCH_REGS regs;
+#ifdef HAVE_GETREGS
+ ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
+#else
+ struct iovec iov;
+ iov.iov_base = &regs;
+ iov.iov_len = sizeof(regs);
+ ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
+#endif
+ EXPECT_EQ(0, ret) {}
+
+#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
+ defined(__s390__) || defined(__hppa__)
+ {
+ regs.SYSCALL_NUM = syscall;
+ }
+#elif defined(__mips__)
+ {
+ if (regs.SYSCALL_NUM == __NR_O32_Linux)
+ regs.SYSCALL_SYSCALL_NUM = syscall;
+ else
+ regs.SYSCALL_NUM = syscall;
+ }
+
+#elif defined(__arm__)
+# ifndef PTRACE_SET_SYSCALL
+# define PTRACE_SET_SYSCALL 23
+# endif
+ {
+ ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
+ EXPECT_EQ(0, ret);
+ }
+
+#elif defined(__aarch64__)
+# ifndef NT_ARM_SYSTEM_CALL
+# define NT_ARM_SYSTEM_CALL 0x404
+# endif
+ {
+ iov.iov_base = &syscall;
+ iov.iov_len = sizeof(syscall);
+ ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
+ &iov);
+ EXPECT_EQ(0, ret);
+ }
+
+#else
+ ASSERT_EQ(1, 0) {
+ TH_LOG("How is the syscall changed on this architecture?");
+ }
+#endif
+
+ /* If syscall is skipped, change return value. */
+ if (syscall == -1)
+#ifdef SYSCALL_NUM_RET_SHARE_REG
+ TH_LOG("Can't modify syscall return on this architecture");
+#else
+ regs.SYSCALL_RET = result;
+#endif
+
+#ifdef HAVE_GETREGS
+ ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
+#else
+ iov.iov_base = &regs;
+ iov.iov_len = sizeof(regs);
+ ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
+#endif
+ EXPECT_EQ(0, ret);
+}
+
+void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
+ int status, void *args)
+{
+ int ret;
+ unsigned long msg;
+
+ /* Make sure we got the right message. */
+ ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
+ EXPECT_EQ(0, ret);
+
+ /* Validate and take action on expected syscalls. */
+ switch (msg) {
+ case 0x1002:
+ /* change getpid to getppid. */
+ EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
+ change_syscall(_metadata, tracee, __NR_getppid, 0);
+ break;
+ case 0x1003:
+ /* skip gettid with valid return code. */
+ EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
+ change_syscall(_metadata, tracee, -1, 45000);
+ break;
+ case 0x1004:
+ /* skip openat with error. */
+ EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
+ change_syscall(_metadata, tracee, -1, -ESRCH);
+ break;
+ case 0x1005:
+ /* do nothing (allow getppid) */
+ EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
+ break;
+ default:
+ EXPECT_EQ(0, msg) {
+ TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
+ kill(tracee, SIGKILL);
+ }
+ }
+
+}
+
+void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
+ int status, void *args)
+{
+ int ret, nr;
+ unsigned long msg;
+ static bool entry;
+
+ /* Make sure we got an empty message. */
+ ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, msg);
+
+ /* The only way to tell PTRACE_SYSCALL entry/exit is by counting. */
+ entry = !entry;
+ if (!entry)
+ return;
+
+ nr = get_syscall(_metadata, tracee);
+
+ if (nr == __NR_getpid)
+ change_syscall(_metadata, tracee, __NR_getppid, 0);
+ if (nr == __NR_gettid)
+ change_syscall(_metadata, tracee, -1, 45000);
+ if (nr == __NR_openat)
+ change_syscall(_metadata, tracee, -1, -ESRCH);
+}
+
+FIXTURE_DATA(TRACE_syscall) {
+ struct sock_fprog prog;
+ pid_t tracer, mytid, mypid, parent;
+};
+
+FIXTURE_SETUP(TRACE_syscall)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ memset(&self->prog, 0, sizeof(self->prog));
+ self->prog.filter = malloc(sizeof(filter));
+ ASSERT_NE(NULL, self->prog.filter);
+ memcpy(self->prog.filter, filter, sizeof(filter));
+ self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+
+ /* Prepare some testable syscall results. */
+ self->mytid = syscall(__NR_gettid);
+ ASSERT_GT(self->mytid, 0);
+ ASSERT_NE(self->mytid, 1) {
+ TH_LOG("Running this test as init is not supported. :)");
+ }
+
+ self->mypid = getpid();
+ ASSERT_GT(self->mypid, 0);
+ ASSERT_EQ(self->mytid, self->mypid);
+
+ self->parent = getppid();
+ ASSERT_GT(self->parent, 0);
+ ASSERT_NE(self->parent, self->mypid);
+
+ /* Launch tracer. */
+ self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
+ false);
+}
+
+FIXTURE_TEARDOWN(TRACE_syscall)
+{
+ teardown_trace_fixture(_metadata, self->tracer);
+ if (self->prog.filter)
+ free(self->prog.filter);
+}
+
+TEST_F(TRACE_syscall, ptrace_syscall_redirected)
+{
+ /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+ teardown_trace_fixture(_metadata, self->tracer);
+ self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+ true);
+
+ /* Tracer will redirect getpid to getppid. */
+ EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, ptrace_syscall_errno)
+{
+ /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+ teardown_trace_fixture(_metadata, self->tracer);
+ self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+ true);
+
+ /* Tracer should skip the open syscall, resulting in ESRCH. */
+ EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
+}
+
+TEST_F(TRACE_syscall, ptrace_syscall_faked)
+{
+ /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+ teardown_trace_fixture(_metadata, self->tracer);
+ self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+ true);
+
+ /* Tracer should skip the gettid syscall, resulting fake pid. */
+ EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
+}
+
+TEST_F(TRACE_syscall, syscall_allowed)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* getppid works as expected (no changes). */
+ EXPECT_EQ(self->parent, syscall(__NR_getppid));
+ EXPECT_NE(self->mypid, syscall(__NR_getppid));
+}
+
+TEST_F(TRACE_syscall, syscall_redirected)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* getpid has been redirected to getppid as expected. */
+ EXPECT_EQ(self->parent, syscall(__NR_getpid));
+ EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, syscall_errno)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* openat has been skipped and an errno return. */
+ EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
+}
+
+TEST_F(TRACE_syscall, syscall_faked)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* gettid has been skipped and an altered return value stored. */
+ EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
+}
+
+TEST_F(TRACE_syscall, skip_after_RET_TRACE)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Install fixture filter. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Install "errno on getppid" filter. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Tracer will redirect getpid to getppid, and we should see EPERM. */
+ errno = 0;
+ EXPECT_EQ(-1, syscall(__NR_getpid));
+ EXPECT_EQ(EPERM, errno);
+}
+
+TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Install fixture filter. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Install "death on getppid" filter. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Tracer will redirect getpid to getppid, and we should die. */
+ EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, skip_after_ptrace)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+ teardown_trace_fixture(_metadata, self->tracer);
+ self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+ true);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Install "errno on getppid" filter. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Tracer will redirect getpid to getppid, and we should see EPERM. */
+ EXPECT_EQ(-1, syscall(__NR_getpid));
+ EXPECT_EQ(EPERM, errno);
+}
+
+TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+ teardown_trace_fixture(_metadata, self->tracer);
+ self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+ true);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Install "death on getppid" filter. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Tracer will redirect getpid to getppid, and we should die. */
+ EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST(seccomp_syscall)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /* Reject insane operation. */
+ ret = seccomp(-1, 0, &prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject crazy op value!");
+ }
+
+ /* Reject strict with flags or pointer. */
+ ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject mode strict with flags!");
+ }
+ ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject mode strict with uargs!");
+ }
+
+ /* Reject insane args for filter. */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject crazy filter flags!");
+ }
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
+ EXPECT_EQ(EFAULT, errno) {
+ TH_LOG("Did not reject NULL filter!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+ EXPECT_EQ(0, errno) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
+ strerror(errno));
+ }
+}
+
+TEST(seccomp_syscall_mode_lock)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Could not install filter!");
+ }
+
+ /* Make sure neither entry point will switch to strict. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Switched to mode strict!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Switched to mode strict!");
+ }
+}
+
+/*
+ * Test detection of known and unknown filter flags. Userspace needs to be able
+ * to check if a filter flag is supported by the current kernel and a good way
+ * of doing that is by attempting to enter filter mode, with the flag bit in
+ * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
+ * that the flag is valid and EINVAL indicates that the flag is invalid.
+ */
+TEST(detect_seccomp_filter_flags)
+{
+ unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
+ SECCOMP_FILTER_FLAG_LOG,
+ SECCOMP_FILTER_FLAG_SPEC_ALLOW };
+ unsigned int flag, all_flags;
+ int i;
+ long ret;
+
+ /* Test detection of known-good filter flags */
+ for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
+ int bits = 0;
+
+ flag = flags[i];
+ /* Make sure the flag is a single bit! */
+ while (flag) {
+ if (flag & 0x1)
+ bits ++;
+ flag >>= 1;
+ }
+ ASSERT_EQ(1, bits);
+ flag = flags[i];
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EFAULT, errno) {
+ TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
+ flag);
+ }
+
+ all_flags |= flag;
+ }
+
+ /* Test detection of all known-good filter flags */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EFAULT, errno) {
+ TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
+ all_flags);
+ }
+
+ /* Test detection of an unknown filter flag */
+ flag = -1;
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
+ flag);
+ }
+
+ /*
+ * Test detection of an unknown filter flag that may simply need to be
+ * added to this test
+ */
+ flag = flags[ARRAY_SIZE(flags) - 1] << 1;
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
+ flag);
+ }
+}
+
+TEST(TSYNC_first)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Could not install initial filter with TSYNC!");
+ }
+}
+
+#define TSYNC_SIBLINGS 2
+struct tsync_sibling {
+ pthread_t tid;
+ pid_t system_tid;
+ sem_t *started;
+ pthread_cond_t *cond;
+ pthread_mutex_t *mutex;
+ int diverge;
+ int num_waits;
+ struct sock_fprog *prog;
+ struct __test_metadata *metadata;
+};
+
+/*
+ * To avoid joining joined threads (which is not allowed by Bionic),
+ * make sure we both successfully join and clear the tid to skip a
+ * later join attempt during fixture teardown. Any remaining threads
+ * will be directly killed during teardown.
+ */
+#define PTHREAD_JOIN(tid, status) \
+ do { \
+ int _rc = pthread_join(tid, status); \
+ if (_rc) { \
+ TH_LOG("pthread_join of tid %u failed: %d\n", \
+ (unsigned int)tid, _rc); \
+ } else { \
+ tid = 0; \
+ } \
+ } while (0)
+
+FIXTURE_DATA(TSYNC) {
+ struct sock_fprog root_prog, apply_prog;
+ struct tsync_sibling sibling[TSYNC_SIBLINGS];
+ sem_t started;
+ pthread_cond_t cond;
+ pthread_mutex_t mutex;
+ int sibling_count;
+};
+
+FIXTURE_SETUP(TSYNC)
+{
+ struct sock_filter root_filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter apply_filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ memset(&self->root_prog, 0, sizeof(self->root_prog));
+ memset(&self->apply_prog, 0, sizeof(self->apply_prog));
+ memset(&self->sibling, 0, sizeof(self->sibling));
+ self->root_prog.filter = malloc(sizeof(root_filter));
+ ASSERT_NE(NULL, self->root_prog.filter);
+ memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
+ self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
+
+ self->apply_prog.filter = malloc(sizeof(apply_filter));
+ ASSERT_NE(NULL, self->apply_prog.filter);
+ memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
+ self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
+
+ self->sibling_count = 0;
+ pthread_mutex_init(&self->mutex, NULL);
+ pthread_cond_init(&self->cond, NULL);
+ sem_init(&self->started, 0, 0);
+ self->sibling[0].tid = 0;
+ self->sibling[0].cond = &self->cond;
+ self->sibling[0].started = &self->started;
+ self->sibling[0].mutex = &self->mutex;
+ self->sibling[0].diverge = 0;
+ self->sibling[0].num_waits = 1;
+ self->sibling[0].prog = &self->root_prog;
+ self->sibling[0].metadata = _metadata;
+ self->sibling[1].tid = 0;
+ self->sibling[1].cond = &self->cond;
+ self->sibling[1].started = &self->started;
+ self->sibling[1].mutex = &self->mutex;
+ self->sibling[1].diverge = 0;
+ self->sibling[1].prog = &self->root_prog;
+ self->sibling[1].num_waits = 1;
+ self->sibling[1].metadata = _metadata;
+}
+
+FIXTURE_TEARDOWN(TSYNC)
+{
+ int sib = 0;
+
+ if (self->root_prog.filter)
+ free(self->root_prog.filter);
+ if (self->apply_prog.filter)
+ free(self->apply_prog.filter);
+
+ for ( ; sib < self->sibling_count; ++sib) {
+ struct tsync_sibling *s = &self->sibling[sib];
+
+ if (!s->tid)
+ continue;
+ /*
+ * If a thread is still running, it may be stuck, so hit
+ * it over the head really hard.
+ */
+ pthread_kill(s->tid, 9);
+ }
+ pthread_mutex_destroy(&self->mutex);
+ pthread_cond_destroy(&self->cond);
+ sem_destroy(&self->started);
+}
+
+void *tsync_sibling(void *data)
+{
+ long ret = 0;
+ struct tsync_sibling *me = data;
+
+ me->system_tid = syscall(__NR_gettid);
+
+ pthread_mutex_lock(me->mutex);
+ if (me->diverge) {
+ /* Just re-apply the root prog to fork the tree */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
+ me->prog, 0, 0);
+ }
+ sem_post(me->started);
+ /* Return outside of started so parent notices failures. */
+ if (ret) {
+ pthread_mutex_unlock(me->mutex);
+ return (void *)SIBLING_EXIT_FAILURE;
+ }
+ do {
+ pthread_cond_wait(me->cond, me->mutex);
+ me->num_waits = me->num_waits - 1;
+ } while (me->num_waits);
+ pthread_mutex_unlock(me->mutex);
+
+ ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
+ if (!ret)
+ return (void *)SIBLING_EXIT_NEWPRIVS;
+ read(0, NULL, 0);
+ return (void *)SIBLING_EXIT_UNKILLED;
+}
+
+void tsync_start_sibling(struct tsync_sibling *sibling)
+{
+ pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
+}
+
+TEST_F(TSYNC, siblings_fail_prctl)
+{
+ long ret;
+ void *status;
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /* Check prctl failure detection by requesting sib 0 diverge. */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("setting filter failed");
+ }
+
+ self->sibling[0].diverge = 1;
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ /* Signal the threads to clean up*/
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure diverging sibling failed to call prctl. */
+ PTHREAD_JOIN(self->sibling[0].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
+ PTHREAD_JOIN(self->sibling[1].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_ancestor)
+{
+ long ret;
+ void *status;
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+ }
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Could install filter on all threads!");
+ }
+ /* Tell the siblings to test the policy */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+ /* Ensure they are both killed and don't exit cleanly. */
+ PTHREAD_JOIN(self->sibling[0].tid, &status);
+ EXPECT_EQ(0x0, (long)status);
+ PTHREAD_JOIN(self->sibling[1].tid, &status);
+ EXPECT_EQ(0x0, (long)status);
+}
+
+TEST_F(TSYNC, two_sibling_want_nnp)
+{
+ void *status;
+
+ /* start siblings before any prctl() operations */
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ /* Tell the siblings to test no policy */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure they are both upset about lacking nnp. */
+ PTHREAD_JOIN(self->sibling[0].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
+ PTHREAD_JOIN(self->sibling[1].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_no_filter)
+{
+ long ret;
+ void *status;
+
+ /* start siblings before any prctl() operations */
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Could install filter on all threads!");
+ }
+
+ /* Tell the siblings to test the policy */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure they are both killed and don't exit cleanly. */
+ PTHREAD_JOIN(self->sibling[0].tid, &status);
+ EXPECT_EQ(0x0, (long)status);
+ PTHREAD_JOIN(self->sibling[1].tid, &status);
+ EXPECT_EQ(0x0, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_one_divergence)
+{
+ long ret;
+ void *status;
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+ }
+ self->sibling[0].diverge = 1;
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(self->sibling[0].system_tid, ret) {
+ TH_LOG("Did not fail on diverged sibling.");
+ }
+
+ /* Wake the threads */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure they are both unkilled. */
+ PTHREAD_JOIN(self->sibling[0].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+ PTHREAD_JOIN(self->sibling[1].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_not_under_filter)
+{
+ long ret, sib;
+ void *status;
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /*
+ * Sibling 0 will have its own seccomp policy
+ * and Sibling 1 will not be under seccomp at
+ * all. Sibling 1 will enter seccomp and 0
+ * will cause failure.
+ */
+ self->sibling[0].diverge = 1;
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(ret, self->sibling[0].system_tid) {
+ TH_LOG("Did not fail on diverged sibling.");
+ }
+ sib = 1;
+ if (ret == self->sibling[0].system_tid)
+ sib = 0;
+
+ pthread_mutex_lock(&self->mutex);
+
+ /* Increment the other siblings num_waits so we can clean up
+ * the one we just saw.
+ */
+ self->sibling[!sib].num_waits += 1;
+
+ /* Signal the thread to clean up*/
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+ PTHREAD_JOIN(self->sibling[sib].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+ /* Poll for actual task death. pthread_join doesn't guarantee it. */
+ while (!kill(self->sibling[sib].system_tid, 0))
+ sleep(0.1);
+ /* Switch to the remaining sibling */
+ sib = !sib;
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Expected the remaining sibling to sync");
+ };
+
+ pthread_mutex_lock(&self->mutex);
+
+ /* If remaining sibling didn't have a chance to wake up during
+ * the first broadcast, manually reduce the num_waits now.
+ */
+ if (self->sibling[sib].num_waits > 1)
+ self->sibling[sib].num_waits = 1;
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+ PTHREAD_JOIN(self->sibling[sib].tid, &status);
+ EXPECT_EQ(0, (long)status);
+ /* Poll for actual task death. pthread_join doesn't guarantee it. */
+ while (!kill(self->sibling[sib].system_tid, 0))
+ sleep(0.1);
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(0, ret); /* just us chickens */
+}
+
+/* Make sure restarted syscalls are seen directly as "restart_syscall". */
+TEST(syscall_restart)
+{
+ long ret;
+ unsigned long msg;
+ pid_t child_pid;
+ int pipefd[2];
+ int status;
+ siginfo_t info = { };
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+
+#ifdef __NR_sigreturn
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
+#endif
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
+
+ /* Allow __NR_write for easy logging. */
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ /* The nanosleep jump target. */
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
+ /* The restart_syscall jump target. */
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+#if defined(__arm__)
+ struct utsname utsbuf;
+#endif
+
+ ASSERT_EQ(0, pipe(pipefd));
+
+ child_pid = fork();
+ ASSERT_LE(0, child_pid);
+ if (child_pid == 0) {
+ /* Child uses EXPECT not ASSERT to deliver status correctly. */
+ char buf = ' ';
+ struct timespec timeout = { };
+
+ /* Attach parent as tracer and stop. */
+ EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
+ EXPECT_EQ(0, raise(SIGSTOP));
+
+ EXPECT_EQ(0, close(pipefd[1]));
+
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Failed to install filter!");
+ }
+
+ EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
+ TH_LOG("Failed to read() sync from parent");
+ }
+ EXPECT_EQ('.', buf) {
+ TH_LOG("Failed to get sync data from read()");
+ }
+
+ /* Start nanosleep to be interrupted. */
+ timeout.tv_sec = 1;
+ errno = 0;
+ EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
+ TH_LOG("Call to nanosleep() failed (errno %d)", errno);
+ }
+
+ /* Read final sync from parent. */
+ EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
+ TH_LOG("Failed final read() from parent");
+ }
+ EXPECT_EQ('!', buf) {
+ TH_LOG("Failed to get final data from read()");
+ }
+
+ /* Directly report the status of our test harness results. */
+ syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
+ : EXIT_FAILURE);
+ }
+ EXPECT_EQ(0, close(pipefd[0]));
+
+ /* Attach to child, setup options, and release. */
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
+ PTRACE_O_TRACESECCOMP));
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+ ASSERT_EQ(1, write(pipefd[1], ".", 1));
+
+ /* Wait for nanosleep() to start. */
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
+ ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
+ ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
+ ASSERT_EQ(0x100, msg);
+ EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
+
+ /* Might as well check siginfo for sanity while we're here. */
+ ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
+ ASSERT_EQ(SIGTRAP, info.si_signo);
+ ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
+ EXPECT_EQ(0, info.si_errno);
+ EXPECT_EQ(getuid(), info.si_uid);
+ /* Verify signal delivery came from child (seccomp-triggered). */
+ EXPECT_EQ(child_pid, info.si_pid);
+
+ /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
+ ASSERT_EQ(0, kill(child_pid, SIGSTOP));
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
+ /* Verify signal delivery came from parent now. */
+ ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
+ EXPECT_EQ(getpid(), info.si_pid);
+
+ /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
+ ASSERT_EQ(0, kill(child_pid, SIGCONT));
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(SIGCONT, WSTOPSIG(status));
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+
+ /* Wait for restart_syscall() to start. */
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
+ ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
+ ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
+
+ ASSERT_EQ(0x200, msg);
+ ret = get_syscall(_metadata, child_pid);
+#if defined(__arm__)
+ /*
+ * FIXME:
+ * - native ARM registers do NOT expose true syscall.
+ * - compat ARM registers on ARM64 DO expose true syscall.
+ */
+ ASSERT_EQ(0, uname(&utsbuf));
+ if (strncmp(utsbuf.machine, "arm", 3) == 0) {
+ EXPECT_EQ(__NR_nanosleep, ret);
+ } else
+#endif
+ {
+ EXPECT_EQ(__NR_restart_syscall, ret);
+ }
+
+ /* Write again to end test. */
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+ ASSERT_EQ(1, write(pipefd[1], "!", 1));
+ EXPECT_EQ(0, close(pipefd[1]));
+
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ if (WIFSIGNALED(status) || WEXITSTATUS(status))
+ _metadata->passed = 0;
+}
+
+TEST_SIGNAL(filter_flag_log, SIGSYS)
+{
+ struct sock_filter allow_filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter kill_filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog allow_prog = {
+ .len = (unsigned short)ARRAY_SIZE(allow_filter),
+ .filter = allow_filter,
+ };
+ struct sock_fprog kill_prog = {
+ .len = (unsigned short)ARRAY_SIZE(kill_filter),
+ .filter = kill_filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
+ ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
+ &allow_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ EXPECT_NE(0, ret) {
+ TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
+ }
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
+ }
+
+ /* Verify that a simple, permissive filter can be added with no flags */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
+ EXPECT_EQ(0, ret);
+
+ /* See if the same filter can be added with the FILTER_FLAG_LOG flag */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+ &allow_prog);
+ ASSERT_NE(EINVAL, errno) {
+ TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
+ }
+ EXPECT_EQ(0, ret);
+
+ /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+ &kill_prog);
+ EXPECT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST(get_action_avail)
+{
+ __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
+ SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
+ SECCOMP_RET_LOG, SECCOMP_RET_ALLOW };
+ __u32 unknown_action = 0x10000000U;
+ int i;
+ long ret;
+
+ ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_NE(EINVAL, errno) {
+ TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
+ }
+ EXPECT_EQ(ret, 0);
+
+ for (i = 0; i < ARRAY_SIZE(actions); i++) {
+ ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
+ EXPECT_EQ(ret, 0) {
+ TH_LOG("Expected action (0x%X) not available!",
+ actions[i]);
+ }
+ }
+
+ /* Check that an unknown action is handled properly (EOPNOTSUPP) */
+ ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
+ EXPECT_EQ(ret, -1);
+ EXPECT_EQ(errno, EOPNOTSUPP);
+}
+
+TEST(get_metadata)
+{
+ pid_t pid;
+ int pipefd[2];
+ char buf;
+ struct seccomp_metadata md;
+ long ret;
+
+ /* Only real root can get metadata. */
+ if (geteuid()) {
+ XFAIL(return, "get_metadata requires real root");
+ return;
+ }
+
+ ASSERT_EQ(0, pipe(pipefd));
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ /* one with log, one without */
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
+ SECCOMP_FILTER_FLAG_LOG, &prog));
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
+
+ ASSERT_EQ(0, close(pipefd[0]));
+ ASSERT_EQ(1, write(pipefd[1], "1", 1));
+ ASSERT_EQ(0, close(pipefd[1]));
+
+ while (1)
+ sleep(100);
+ }
+
+ ASSERT_EQ(0, close(pipefd[1]));
+ ASSERT_EQ(1, read(pipefd[0], &buf, 1));
+
+ ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
+ ASSERT_EQ(pid, waitpid(pid, NULL, 0));
+
+ /* Past here must not use ASSERT or child process is never killed. */
+
+ md.filter_off = 0;
+ errno = 0;
+ ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
+ EXPECT_EQ(sizeof(md), ret) {
+ if (errno == EINVAL)
+ XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
+ }
+
+ EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
+ EXPECT_EQ(md.filter_off, 0);
+
+ md.filter_off = 1;
+ ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
+ EXPECT_EQ(sizeof(md), ret);
+ EXPECT_EQ(md.flags, 0);
+ EXPECT_EQ(md.filter_off, 1);
+
+skip:
+ ASSERT_EQ(0, kill(pid, SIGKILL));
+}
+
+/*
+ * TODO:
+ * - add microbenchmarks
+ * - expand NNP testing
+ * - better arch-specific TRACE and TRAP handlers.
+ * - endianness checking when appropriate
+ * - 64-bit arg prodding
+ * - arch value testing (x86 modes especially)
+ * - verify that FILTER_FLAG_LOG filters generate log messages
+ * - verify that RET_LOG generates log messages
+ * - ...
+ */
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/sigaltstack/.gitignore b/tools/testing/selftests/sigaltstack/.gitignore
new file mode 100644
index 000000000..35897b0a3
--- /dev/null
+++ b/tools/testing/selftests/sigaltstack/.gitignore
@@ -0,0 +1 @@
+sas
diff --git a/tools/testing/selftests/sigaltstack/Makefile b/tools/testing/selftests/sigaltstack/Makefile
new file mode 100644
index 000000000..f68fbf80d
--- /dev/null
+++ b/tools/testing/selftests/sigaltstack/Makefile
@@ -0,0 +1,5 @@
+CFLAGS = -Wall
+TEST_GEN_PROGS = sas
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
new file mode 100644
index 000000000..228c2ae47
--- /dev/null
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Stas Sergeev <stsp@users.sourceforge.net>
+ *
+ * test sigaltstack(SS_ONSTACK | SS_AUTODISARM)
+ * If that succeeds, then swapcontext() can be used inside sighandler safely.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <ucontext.h>
+#include <alloca.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+
+#ifndef SS_AUTODISARM
+#define SS_AUTODISARM (1U << 31)
+#endif
+
+static void *sstack, *ustack;
+static ucontext_t uc, sc;
+static const char *msg = "[OK]\tStack preserved";
+static const char *msg2 = "[FAIL]\tStack corrupted";
+struct stk_data {
+ char msg[128];
+ int flag;
+};
+
+void my_usr1(int sig, siginfo_t *si, void *u)
+{
+ char *aa;
+ int err;
+ stack_t stk;
+ struct stk_data *p;
+
+#if __s390x__
+ register unsigned long sp asm("%15");
+#else
+ register unsigned long sp asm("sp");
+#endif
+
+ if (sp < (unsigned long)sstack ||
+ sp >= (unsigned long)sstack + SIGSTKSZ) {
+ ksft_exit_fail_msg("SP is not on sigaltstack\n");
+ }
+ /* put some data on stack. other sighandler will try to overwrite it */
+ aa = alloca(1024);
+ assert(aa);
+ p = (struct stk_data *)(aa + 512);
+ strcpy(p->msg, msg);
+ p->flag = 1;
+ ksft_print_msg("[RUN]\tsignal USR1\n");
+ err = sigaltstack(NULL, &stk);
+ if (err) {
+ ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ if (stk.ss_flags != SS_DISABLE)
+ ksft_test_result_fail("tss_flags=%x, should be SS_DISABLE\n",
+ stk.ss_flags);
+ else
+ ksft_test_result_pass(
+ "sigaltstack is disabled in sighandler\n");
+ swapcontext(&sc, &uc);
+ ksft_print_msg("%s\n", p->msg);
+ if (!p->flag) {
+ ksft_exit_skip("[RUN]\tAborting\n");
+ exit(EXIT_FAILURE);
+ }
+}
+
+void my_usr2(int sig, siginfo_t *si, void *u)
+{
+ char *aa;
+ struct stk_data *p;
+
+ ksft_print_msg("[RUN]\tsignal USR2\n");
+ aa = alloca(1024);
+ /* dont run valgrind on this */
+ /* try to find the data stored by previous sighandler */
+ p = memmem(aa, 1024, msg, strlen(msg));
+ if (p) {
+ ksft_test_result_fail("sigaltstack re-used\n");
+ /* corrupt the data */
+ strcpy(p->msg, msg2);
+ /* tell other sighandler that his data is corrupted */
+ p->flag = 0;
+ }
+}
+
+static void switch_fn(void)
+{
+ ksft_print_msg("[RUN]\tswitched to user ctx\n");
+ raise(SIGUSR2);
+ setcontext(&sc);
+}
+
+int main(void)
+{
+ struct sigaction act;
+ stack_t stk;
+ int err;
+
+ ksft_print_header();
+
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_ONSTACK | SA_SIGINFO;
+ act.sa_sigaction = my_usr1;
+ sigaction(SIGUSR1, &act, NULL);
+ act.sa_sigaction = my_usr2;
+ sigaction(SIGUSR2, &act, NULL);
+ sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (sstack == MAP_FAILED) {
+ ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ err = sigaltstack(NULL, &stk);
+ if (err) {
+ ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ if (stk.ss_flags == SS_DISABLE) {
+ ksft_test_result_pass(
+ "Initial sigaltstack state was SS_DISABLE\n");
+ } else {
+ ksft_exit_fail_msg("Initial sigaltstack state was %x; "
+ "should have been SS_DISABLE\n", stk.ss_flags);
+ return EXIT_FAILURE;
+ }
+
+ stk.ss_sp = sstack;
+ stk.ss_size = SIGSTKSZ;
+ stk.ss_flags = SS_ONSTACK | SS_AUTODISARM;
+ err = sigaltstack(&stk, NULL);
+ if (err) {
+ if (errno == EINVAL) {
+ ksft_exit_skip(
+ "[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n");
+ /*
+ * If test cases for the !SS_AUTODISARM variant were
+ * added, we could still run them. We don't have any
+ * test cases like that yet, so just exit and report
+ * success.
+ */
+ return 0;
+ } else {
+ ksft_exit_fail_msg(
+ "sigaltstack(SS_ONSTACK | SS_AUTODISARM) %s\n",
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+ }
+
+ ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (ustack == MAP_FAILED) {
+ ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+ getcontext(&uc);
+ uc.uc_link = NULL;
+ uc.uc_stack.ss_sp = ustack;
+ uc.uc_stack.ss_size = SIGSTKSZ;
+ makecontext(&uc, switch_fn, 0);
+ raise(SIGUSR1);
+
+ err = sigaltstack(NULL, &stk);
+ if (err) {
+ ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ if (stk.ss_flags != SS_AUTODISARM) {
+ ksft_exit_fail_msg("ss_flags=%x, should be SS_AUTODISARM\n",
+ stk.ss_flags);
+ exit(EXIT_FAILURE);
+ }
+ ksft_test_result_pass(
+ "sigaltstack is still SS_AUTODISARM after signal\n");
+
+ ksft_exit_pass();
+ return 0;
+}
diff --git a/tools/testing/selftests/size/.gitignore b/tools/testing/selftests/size/.gitignore
new file mode 100644
index 000000000..189b7818d
--- /dev/null
+++ b/tools/testing/selftests/size/.gitignore
@@ -0,0 +1 @@
+get_size
diff --git a/tools/testing/selftests/size/Makefile b/tools/testing/selftests/size/Makefile
new file mode 100644
index 000000000..4685b3e42
--- /dev/null
+++ b/tools/testing/selftests/size/Makefile
@@ -0,0 +1,5 @@
+CFLAGS := -static -ffreestanding -nostartfiles -s
+
+TEST_GEN_PROGS := get_size
+
+include ../lib.mk
diff --git a/tools/testing/selftests/size/get_size.c b/tools/testing/selftests/size/get_size.c
new file mode 100644
index 000000000..f55943b6d
--- /dev/null
+++ b/tools/testing/selftests/size/get_size.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2014 Sony Mobile Communications Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2
+ *
+ * Selftest for runtime system size
+ *
+ * Prints the amount of RAM that the currently running system is using.
+ *
+ * This program tries to be as small as possible itself, to
+ * avoid perturbing the system memory utilization with its
+ * own execution. It also attempts to have as few dependencies
+ * on kernel features as possible.
+ *
+ * It should be statically linked, with startup libs avoided. It uses
+ * no library calls except the syscall() function for the following 3
+ * syscalls:
+ * sysinfo(), write(), and _exit()
+ *
+ * For output, it avoids printf (which in some C libraries
+ * has large external dependencies) by implementing it's own
+ * number output and print routines, and using __builtin_strlen()
+ *
+ * The test may crash if any of the above syscalls fails because in some
+ * libc implementations (e.g. the GNU C Library) errno is saved in
+ * thread-local storage, which does not get initialized due to avoiding
+ * startup libs.
+ */
+
+#include <sys/sysinfo.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#define STDOUT_FILENO 1
+
+static int print(const char *s)
+{
+ size_t len = 0;
+
+ while (s[len] != '\0')
+ len++;
+
+ return syscall(SYS_write, STDOUT_FILENO, s, len);
+}
+
+static inline char *num_to_str(unsigned long num, char *buf, int len)
+{
+ unsigned int digit;
+
+ /* put digits in buffer from back to front */
+ buf += len - 1;
+ *buf = 0;
+ do {
+ digit = num % 10;
+ *(--buf) = digit + '0';
+ num /= 10;
+ } while (num > 0);
+
+ return buf;
+}
+
+static int print_num(unsigned long num)
+{
+ char num_buf[30];
+
+ return print(num_to_str(num, num_buf, sizeof(num_buf)));
+}
+
+static int print_k_value(const char *s, unsigned long num, unsigned long units)
+{
+ unsigned long long temp;
+ int ccode;
+
+ print(s);
+
+ temp = num;
+ temp = (temp * units)/1024;
+ num = temp;
+ ccode = print_num(num);
+ print("\n");
+ return ccode;
+}
+
+/* this program has no main(), as startup libraries are not used */
+void _start(void)
+{
+ int ccode;
+ struct sysinfo info;
+ unsigned long used;
+ static const char *test_name = " get runtime memory use\n";
+
+ print("TAP version 13\n");
+ print("# Testing system size.\n");
+
+ ccode = syscall(SYS_sysinfo, &info);
+ if (ccode < 0) {
+ print("not ok 1");
+ print(test_name);
+ print(" ---\n reason: \"could not get sysinfo\"\n ...\n");
+ syscall(SYS_exit, ccode);
+ }
+ print("ok 1");
+ print(test_name);
+
+ /* ignore cache complexities for now */
+ used = info.totalram - info.freeram - info.bufferram;
+ print("# System runtime memory report (units in Kilobytes):\n");
+ print(" ---\n");
+ print_k_value(" Total: ", info.totalram, info.mem_unit);
+ print_k_value(" Free: ", info.freeram, info.mem_unit);
+ print_k_value(" Buffer: ", info.bufferram, info.mem_unit);
+ print_k_value(" In use: ", used, info.mem_unit);
+ print(" ...\n");
+ print("1..1\n");
+
+ syscall(SYS_exit, 0);
+}
diff --git a/tools/testing/selftests/sparc64/Makefile b/tools/testing/selftests/sparc64/Makefile
new file mode 100644
index 000000000..a19531dba
--- /dev/null
+++ b/tools/testing/selftests/sparc64/Makefile
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/x86_64/x86/)
+
+ifneq ($(ARCH),sparc64)
+nothing:
+.PHONY: all clean run_tests install
+.SILENT:
+else
+
+SUBDIRS := drivers
+
+TEST_PROGS := run.sh
+
+
+.PHONY: all clean
+
+include ../lib.mk
+
+all:
+ @for DIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir $$BUILD_TARGET -p; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ #SUBDIR test prog name should be in the form: SUBDIR_test.sh \
+ TEST=$$DIR"_test.sh"; \
+ if [ -e $$DIR/$$TEST ]; then \
+ rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \
+ fi \
+ done
+
+override define INSTALL_RULE
+ mkdir -p $(INSTALL_PATH)
+ install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+
+ @for SUBDIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$SUBDIR; \
+ mkdir $$BUILD_TARGET -p; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
+ done;
+endef
+
+override define CLEAN
+ @for DIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir $$BUILD_TARGET -p; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ done
+endef
+endif
diff --git a/tools/testing/selftests/sparc64/drivers/.gitignore b/tools/testing/selftests/sparc64/drivers/.gitignore
new file mode 100644
index 000000000..90e835ed7
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/.gitignore
@@ -0,0 +1 @@
+adi-test
diff --git a/tools/testing/selftests/sparc64/drivers/Makefile b/tools/testing/selftests/sparc64/drivers/Makefile
new file mode 100644
index 000000000..deb0df415
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+INCLUDEDIR := -I.
+CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g
+
+TEST_GEN_FILES := adi-test
+
+all: $(TEST_GEN_FILES)
+
+$(TEST_GEN_FILES): adi-test.c
+
+TEST_PROGS := drivers_test.sh
+
+include ../../lib.mk
+
+$(OUTPUT)/adi-test: adi-test.c
diff --git a/tools/testing/selftests/sparc64/drivers/adi-test.c b/tools/testing/selftests/sparc64/drivers/adi-test.c
new file mode 100644
index 000000000..95d93c6a8
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/adi-test.c
@@ -0,0 +1,721 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * selftest for sparc64's privileged ADI driver
+ *
+ * Author: Tom Hromatka <tom.hromatka@oracle.com>
+ */
+#include <linux/kernel.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "../../kselftest.h"
+
+#define DEBUG_LEVEL_1_BIT (0x0001)
+#define DEBUG_LEVEL_2_BIT (0x0002)
+#define DEBUG_LEVEL_3_BIT (0x0004)
+#define DEBUG_LEVEL_4_BIT (0x0008)
+#define DEBUG_TIMING_BIT (0x1000)
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+/* bit mask of enabled bits to print */
+#define DEBUG 0x0001
+
+#define DEBUG_PRINT_L1(...) debug_print(DEBUG_LEVEL_1_BIT, __VA_ARGS__)
+#define DEBUG_PRINT_L2(...) debug_print(DEBUG_LEVEL_2_BIT, __VA_ARGS__)
+#define DEBUG_PRINT_L3(...) debug_print(DEBUG_LEVEL_3_BIT, __VA_ARGS__)
+#define DEBUG_PRINT_L4(...) debug_print(DEBUG_LEVEL_4_BIT, __VA_ARGS__)
+#define DEBUG_PRINT_T(...) debug_print(DEBUG_TIMING_BIT, __VA_ARGS__)
+
+static void debug_print(int level, const char *s, ...)
+{
+ va_list args;
+
+ va_start(args, s);
+
+ if (DEBUG & level)
+ vfprintf(stdout, s, args);
+ va_end(args);
+}
+
+#ifndef min
+#define min(x, y) ((x) < (y) ? x : y)
+#endif
+
+#define RETURN_FROM_TEST(_ret) \
+ do { \
+ DEBUG_PRINT_L1( \
+ "\tTest %s returned %d\n", __func__, _ret); \
+ return _ret; \
+ } while (0)
+
+#define ADI_BLKSZ 64
+#define ADI_MAX_VERSION 15
+
+#define TEST_STEP_FAILURE(_ret) \
+ do { \
+ fprintf(stderr, "\tTest step failure: %d at %s:%d\n", \
+ _ret, __func__, __LINE__); \
+ goto out; \
+ } while (0)
+
+#define RDTICK(_x) \
+ asm volatile(" rd %%tick, %0\n" : "=r" (_x))
+
+static int random_version(void)
+{
+ long tick;
+
+ RDTICK(tick);
+
+ return tick % (ADI_MAX_VERSION + 1);
+}
+
+#define MAX_RANGES_SUPPORTED 5
+static const char system_ram_str[] = "System RAM\n";
+static int range_count;
+static unsigned long long int start_addr[MAX_RANGES_SUPPORTED];
+static unsigned long long int end_addr[MAX_RANGES_SUPPORTED];
+
+struct stats {
+ char name[16];
+ unsigned long total;
+ unsigned long count;
+ unsigned long bytes;
+};
+
+static struct stats read_stats = {
+ .name = "read", .total = 0, .count = 0, .bytes = 0};
+static struct stats pread_stats = {
+ .name = "pread", .total = 0, .count = 0, .bytes = 0};
+static struct stats write_stats = {
+ .name = "write", .total = 0, .count = 0, .bytes = 0};
+static struct stats pwrite_stats = {
+ .name = "pwrite", .total = 0, .count = 0, .bytes = 0};
+static struct stats seek_stats = {
+ .name = "seek", .total = 0, .count = 0, .bytes = 0};
+
+static void update_stats(struct stats * const ustats,
+ unsigned long measurement, unsigned long bytes)
+{
+ ustats->total += measurement;
+ ustats->bytes += bytes;
+ ustats->count++;
+}
+
+static void print_ustats(const struct stats * const ustats)
+{
+ DEBUG_PRINT_L1("%s\t%7d\t%7.0f\t%7.0f\n",
+ ustats->name, ustats->count,
+ (float)ustats->total / (float)ustats->count,
+ (float)ustats->bytes / (float)ustats->count);
+}
+
+static void print_stats(void)
+{
+ DEBUG_PRINT_L1("\nSyscall\tCall\tAvgTime\tAvgSize\n"
+ "\tCount\t(ticks)\t(bytes)\n"
+ "-------------------------------\n");
+
+ print_ustats(&read_stats);
+ print_ustats(&pread_stats);
+ print_ustats(&write_stats);
+ print_ustats(&pwrite_stats);
+ print_ustats(&seek_stats);
+}
+
+static int build_memory_map(void)
+{
+ char line[256];
+ FILE *fp;
+ int i;
+
+ range_count = 0;
+
+ fp = fopen("/proc/iomem", "r");
+ if (!fp) {
+ fprintf(stderr, "/proc/iomem: error %d: %s\n",
+ errno, strerror(errno));
+ return -errno;
+ }
+
+ while (fgets(line, sizeof(line), fp) != 0) {
+ if (strstr(line, system_ram_str)) {
+ char *dash, *end_ptr;
+
+ /* Given a line like this:
+ * d0400000-10ffaffff : System RAM
+ * replace the "-" with a space
+ */
+ dash = strstr(line, "-");
+ dash[0] = 0x20;
+
+ start_addr[range_count] = strtoull(line, &end_ptr, 16);
+ end_addr[range_count] = strtoull(end_ptr, NULL, 16);
+ range_count++;
+ }
+ }
+
+ fclose(fp);
+
+ DEBUG_PRINT_L1("RAM Ranges\n");
+ for (i = 0; i < range_count; i++)
+ DEBUG_PRINT_L1("\trange %d: 0x%llx\t- 0x%llx\n",
+ i, start_addr[i], end_addr[i]);
+
+ if (range_count == 0) {
+ fprintf(stderr, "No valid address ranges found. Error.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int read_adi(int fd, unsigned char *buf, int buf_sz)
+{
+ int ret, bytes_read = 0;
+ long start, end, elapsed_time = 0;
+
+ do {
+ RDTICK(start);
+ ret = read(fd, buf + bytes_read, buf_sz - bytes_read);
+ RDTICK(end);
+ if (ret < 0)
+ return -errno;
+
+ elapsed_time += end - start;
+ update_stats(&read_stats, elapsed_time, buf_sz);
+ bytes_read += ret;
+
+ } while (bytes_read < buf_sz);
+
+ DEBUG_PRINT_T("\tread elapsed timed = %ld\n", elapsed_time);
+ DEBUG_PRINT_L3("\tRead %d bytes\n", bytes_read);
+
+ return bytes_read;
+}
+
+static int pread_adi(int fd, unsigned char *buf,
+ int buf_sz, unsigned long offset)
+{
+ int ret, i, bytes_read = 0;
+ unsigned long cur_offset;
+ long start, end, elapsed_time = 0;
+
+ cur_offset = offset;
+ do {
+ RDTICK(start);
+ ret = pread(fd, buf + bytes_read, buf_sz - bytes_read,
+ cur_offset);
+ RDTICK(end);
+ if (ret < 0)
+ return -errno;
+
+ elapsed_time += end - start;
+ update_stats(&pread_stats, elapsed_time, buf_sz);
+ bytes_read += ret;
+ cur_offset += ret;
+
+ } while (bytes_read < buf_sz);
+
+ DEBUG_PRINT_T("\tpread elapsed timed = %ld\n", elapsed_time);
+ DEBUG_PRINT_L3("\tRead %d bytes starting at offset 0x%lx\n",
+ bytes_read, offset);
+ for (i = 0; i < bytes_read; i++)
+ DEBUG_PRINT_L4("\t\t0x%lx\t%d\n", offset + i, buf[i]);
+
+ return bytes_read;
+}
+
+static int write_adi(int fd, const unsigned char * const buf, int buf_sz)
+{
+ int ret, bytes_written = 0;
+ long start, end, elapsed_time = 0;
+
+ do {
+ RDTICK(start);
+ ret = write(fd, buf + bytes_written, buf_sz - bytes_written);
+ RDTICK(end);
+ if (ret < 0)
+ return -errno;
+
+ elapsed_time += (end - start);
+ update_stats(&write_stats, elapsed_time, buf_sz);
+ bytes_written += ret;
+ } while (bytes_written < buf_sz);
+
+ DEBUG_PRINT_T("\twrite elapsed timed = %ld\n", elapsed_time);
+ DEBUG_PRINT_L3("\tWrote %d of %d bytes\n", bytes_written, buf_sz);
+
+ return bytes_written;
+}
+
+static int pwrite_adi(int fd, const unsigned char * const buf,
+ int buf_sz, unsigned long offset)
+{
+ int ret, bytes_written = 0;
+ unsigned long cur_offset;
+ long start, end, elapsed_time = 0;
+
+ cur_offset = offset;
+
+ do {
+ RDTICK(start);
+ ret = pwrite(fd, buf + bytes_written,
+ buf_sz - bytes_written, cur_offset);
+ RDTICK(end);
+ if (ret < 0) {
+ fprintf(stderr, "pwrite(): error %d: %s\n",
+ errno, strerror(errno));
+ return -errno;
+ }
+
+ elapsed_time += (end - start);
+ update_stats(&pwrite_stats, elapsed_time, buf_sz);
+ bytes_written += ret;
+ cur_offset += ret;
+
+ } while (bytes_written < buf_sz);
+
+ DEBUG_PRINT_T("\tpwrite elapsed timed = %ld\n", elapsed_time);
+ DEBUG_PRINT_L3("\tWrote %d of %d bytes starting at address 0x%lx\n",
+ bytes_written, buf_sz, offset);
+
+ return bytes_written;
+}
+
+static off_t seek_adi(int fd, off_t offset, int whence)
+{
+ long start, end;
+ off_t ret;
+
+ RDTICK(start);
+ ret = lseek(fd, offset, whence);
+ RDTICK(end);
+ DEBUG_PRINT_L2("\tlseek ret = 0x%llx\n", ret);
+ if (ret < 0)
+ goto out;
+
+ DEBUG_PRINT_T("\tlseek elapsed timed = %ld\n", end - start);
+ update_stats(&seek_stats, end - start, 0);
+
+out:
+ (void)lseek(fd, 0, SEEK_END);
+ return ret;
+}
+
+static int test0_prpw_aligned_1byte(int fd)
+{
+ /* somewhat arbitrarily chosen address */
+ unsigned long paddr =
+ (end_addr[range_count - 1] - 0x1000) & ~(ADI_BLKSZ - 1);
+ unsigned char version[1], expected_version;
+ loff_t offset;
+ int ret;
+
+ version[0] = random_version();
+ expected_version = version[0];
+
+ offset = paddr / ADI_BLKSZ;
+
+ ret = pwrite_adi(fd, version, sizeof(version), offset);
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ ret = pread_adi(fd, version, sizeof(version), offset);
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ if (expected_version != version[0]) {
+ DEBUG_PRINT_L2("\tExpected version %d but read version %d\n",
+ expected_version, version[0]);
+ TEST_STEP_FAILURE(-expected_version);
+ }
+
+ ret = 0;
+out:
+ RETURN_FROM_TEST(ret);
+}
+
+#define TEST1_VERSION_SZ 4096
+static int test1_prpw_aligned_4096bytes(int fd)
+{
+ /* somewhat arbitrarily chosen address */
+ unsigned long paddr =
+ (end_addr[range_count - 1] - 0x6000) & ~(ADI_BLKSZ - 1);
+ unsigned char version[TEST1_VERSION_SZ],
+ expected_version[TEST1_VERSION_SZ];
+ loff_t offset;
+ int ret, i;
+
+ for (i = 0; i < TEST1_VERSION_SZ; i++) {
+ version[i] = random_version();
+ expected_version[i] = version[i];
+ }
+
+ offset = paddr / ADI_BLKSZ;
+
+ ret = pwrite_adi(fd, version, sizeof(version), offset);
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ ret = pread_adi(fd, version, sizeof(version), offset);
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ for (i = 0; i < TEST1_VERSION_SZ; i++) {
+ if (expected_version[i] != version[i]) {
+ DEBUG_PRINT_L2(
+ "\tExpected version %d but read version %d\n",
+ expected_version, version[0]);
+ TEST_STEP_FAILURE(-expected_version[i]);
+ }
+ }
+
+ ret = 0;
+out:
+ RETURN_FROM_TEST(ret);
+}
+
+#define TEST2_VERSION_SZ 10327
+static int test2_prpw_aligned_10327bytes(int fd)
+{
+ /* somewhat arbitrarily chosen address */
+ unsigned long paddr =
+ (start_addr[0] + 0x6000) & ~(ADI_BLKSZ - 1);
+ unsigned char version[TEST2_VERSION_SZ],
+ expected_version[TEST2_VERSION_SZ];
+ loff_t offset;
+ int ret, i;
+
+ for (i = 0; i < TEST2_VERSION_SZ; i++) {
+ version[i] = random_version();
+ expected_version[i] = version[i];
+ }
+
+ offset = paddr / ADI_BLKSZ;
+
+ ret = pwrite_adi(fd, version, sizeof(version), offset);
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ ret = pread_adi(fd, version, sizeof(version), offset);
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ for (i = 0; i < TEST2_VERSION_SZ; i++) {
+ if (expected_version[i] != version[i]) {
+ DEBUG_PRINT_L2(
+ "\tExpected version %d but read version %d\n",
+ expected_version, version[0]);
+ TEST_STEP_FAILURE(-expected_version[i]);
+ }
+ }
+
+ ret = 0;
+out:
+ RETURN_FROM_TEST(ret);
+}
+
+#define TEST3_VERSION_SZ 12541
+static int test3_prpw_unaligned_12541bytes(int fd)
+{
+ /* somewhat arbitrarily chosen address */
+ unsigned long paddr =
+ ((start_addr[0] + 0xC000) & ~(ADI_BLKSZ - 1)) + 17;
+ unsigned char version[TEST3_VERSION_SZ],
+ expected_version[TEST3_VERSION_SZ];
+ loff_t offset;
+ int ret, i;
+
+ for (i = 0; i < TEST3_VERSION_SZ; i++) {
+ version[i] = random_version();
+ expected_version[i] = version[i];
+ }
+
+ offset = paddr / ADI_BLKSZ;
+
+ ret = pwrite_adi(fd, version, sizeof(version), offset);
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ ret = pread_adi(fd, version, sizeof(version), offset);
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ for (i = 0; i < TEST3_VERSION_SZ; i++) {
+ if (expected_version[i] != version[i]) {
+ DEBUG_PRINT_L2(
+ "\tExpected version %d but read version %d\n",
+ expected_version, version[0]);
+ TEST_STEP_FAILURE(-expected_version[i]);
+ }
+ }
+
+ ret = 0;
+out:
+ RETURN_FROM_TEST(ret);
+}
+
+static int test4_lseek(int fd)
+{
+#define OFFSET_ADD (0x100)
+#define OFFSET_SUBTRACT (0xFFFFFFF000000000)
+
+ off_t offset_out, offset_in;
+ int ret;
+
+
+ offset_in = 0x123456789abcdef0;
+ offset_out = seek_adi(fd, offset_in, SEEK_SET);
+ if (offset_out != offset_in) {
+ ret = -1;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ /* seek to the current offset. this should return EINVAL */
+ offset_out = seek_adi(fd, offset_in, SEEK_SET);
+ if (offset_out < 0 && errno == EINVAL)
+ DEBUG_PRINT_L2(
+ "\tSEEK_SET failed as designed. Not an error\n");
+ else {
+ ret = -2;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ offset_out = seek_adi(fd, 0, SEEK_CUR);
+ if (offset_out != offset_in) {
+ ret = -3;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ offset_out = seek_adi(fd, OFFSET_ADD, SEEK_CUR);
+ if (offset_out != (offset_in + OFFSET_ADD)) {
+ ret = -4;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ offset_out = seek_adi(fd, OFFSET_SUBTRACT, SEEK_CUR);
+ if (offset_out != (offset_in + OFFSET_ADD + OFFSET_SUBTRACT)) {
+ ret = -5;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ ret = 0;
+out:
+ RETURN_FROM_TEST(ret);
+}
+
+static int test5_rw_aligned_1byte(int fd)
+{
+ /* somewhat arbitrarily chosen address */
+ unsigned long paddr =
+ (end_addr[range_count - 1] - 0xF000) & ~(ADI_BLKSZ - 1);
+ unsigned char version, expected_version;
+ loff_t offset;
+ off_t oret;
+ int ret;
+
+ offset = paddr / ADI_BLKSZ;
+ version = expected_version = random_version();
+
+ oret = seek_adi(fd, offset, SEEK_SET);
+ if (oret != offset) {
+ ret = -1;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ ret = write_adi(fd, &version, sizeof(version));
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ oret = seek_adi(fd, offset, SEEK_SET);
+ if (oret != offset) {
+ ret = -1;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ ret = read_adi(fd, &version, sizeof(version));
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ if (expected_version != version) {
+ DEBUG_PRINT_L2("\tExpected version %d but read version %d\n",
+ expected_version, version);
+ TEST_STEP_FAILURE(-expected_version);
+ }
+
+ ret = 0;
+out:
+ RETURN_FROM_TEST(ret);
+}
+
+#define TEST6_VERSION_SZ 9434
+static int test6_rw_aligned_9434bytes(int fd)
+{
+ /* somewhat arbitrarily chosen address */
+ unsigned long paddr =
+ (end_addr[range_count - 1] - 0x5F000) & ~(ADI_BLKSZ - 1);
+ unsigned char version[TEST6_VERSION_SZ],
+ expected_version[TEST6_VERSION_SZ];
+ loff_t offset;
+ off_t oret;
+ int ret, i;
+
+ offset = paddr / ADI_BLKSZ;
+ for (i = 0; i < TEST6_VERSION_SZ; i++)
+ version[i] = expected_version[i] = random_version();
+
+ oret = seek_adi(fd, offset, SEEK_SET);
+ if (oret != offset) {
+ ret = -1;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ ret = write_adi(fd, version, sizeof(version));
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ memset(version, 0, TEST6_VERSION_SZ);
+
+ oret = seek_adi(fd, offset, SEEK_SET);
+ if (oret != offset) {
+ ret = -1;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ ret = read_adi(fd, version, sizeof(version));
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ for (i = 0; i < TEST6_VERSION_SZ; i++) {
+ if (expected_version[i] != version[i]) {
+ DEBUG_PRINT_L2(
+ "\tExpected version %d but read version %d\n",
+ expected_version[i], version[i]);
+ TEST_STEP_FAILURE(-expected_version[i]);
+ }
+ }
+
+ ret = 0;
+out:
+ RETURN_FROM_TEST(ret);
+}
+
+#define TEST7_VERSION_SZ 14963
+static int test7_rw_aligned_14963bytes(int fd)
+{
+ /* somewhat arbitrarily chosen address */
+ unsigned long paddr =
+ ((start_addr[range_count - 1] + 0xF000) & ~(ADI_BLKSZ - 1)) + 39;
+ unsigned char version[TEST7_VERSION_SZ],
+ expected_version[TEST7_VERSION_SZ];
+ loff_t offset;
+ off_t oret;
+ int ret, i;
+
+ offset = paddr / ADI_BLKSZ;
+ for (i = 0; i < TEST7_VERSION_SZ; i++) {
+ version[i] = random_version();
+ expected_version[i] = version[i];
+ }
+
+ oret = seek_adi(fd, offset, SEEK_SET);
+ if (oret != offset) {
+ ret = -1;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ ret = write_adi(fd, version, sizeof(version));
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ memset(version, 0, TEST7_VERSION_SZ);
+
+ oret = seek_adi(fd, offset, SEEK_SET);
+ if (oret != offset) {
+ ret = -1;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ ret = read_adi(fd, version, sizeof(version));
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ for (i = 0; i < TEST7_VERSION_SZ; i++) {
+ if (expected_version[i] != version[i]) {
+ DEBUG_PRINT_L2(
+ "\tExpected version %d but read version %d\n",
+ expected_version[i], version[i]);
+ TEST_STEP_FAILURE(-expected_version[i]);
+ }
+
+ paddr += ADI_BLKSZ;
+ }
+
+ ret = 0;
+out:
+ RETURN_FROM_TEST(ret);
+}
+
+static int (*tests[])(int fd) = {
+ test0_prpw_aligned_1byte,
+ test1_prpw_aligned_4096bytes,
+ test2_prpw_aligned_10327bytes,
+ test3_prpw_unaligned_12541bytes,
+ test4_lseek,
+ test5_rw_aligned_1byte,
+ test6_rw_aligned_9434bytes,
+ test7_rw_aligned_14963bytes,
+};
+#define TEST_COUNT ARRAY_SIZE(tests)
+
+int main(int argc, char *argv[])
+{
+ int fd, ret, test;
+
+ ret = build_memory_map();
+ if (ret < 0)
+ return ret;
+
+ fd = open("/dev/adi", O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "open: error %d: %s\n",
+ errno, strerror(errno));
+ return -errno;
+ }
+
+ for (test = 0; test < TEST_COUNT; test++) {
+ DEBUG_PRINT_L1("Running test #%d\n", test);
+
+ ret = (*tests[test])(fd);
+ if (ret != 0)
+ ksft_test_result_fail("Test #%d failed: error %d\n",
+ test, ret);
+ else
+ ksft_test_result_pass("Test #%d passed\n", test);
+ }
+
+ print_stats();
+ close(fd);
+
+ if (ksft_get_fail_cnt() > 0)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+
+ /* it's impossible to get here, but the compiler throws a warning
+ * about control reaching the end of non-void function. bah.
+ */
+ return 0;
+}
diff --git a/tools/testing/selftests/sparc64/drivers/drivers_test.sh b/tools/testing/selftests/sparc64/drivers/drivers_test.sh
new file mode 100755
index 000000000..6d08273b7
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/drivers_test.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+SRC_TREE=../../../../
+
+test_run()
+{
+ if [ -f ${SRC_TREE}/drivers/char/adi.ko ]; then
+ insmod ${SRC_TREE}/drivers/char/adi.ko 2> /dev/null
+ if [ $? -ne 0 ]; then
+ rc=1
+ fi
+ else
+ # Use modprobe dry run to check for missing adi module
+ if ! /sbin/modprobe -q -n adi; then
+ echo "adi: [SKIP]"
+ elif /sbin/modprobe -q adi; then
+ echo "adi: ok"
+ else
+ echo "adi: [FAIL]"
+ rc=1
+ fi
+ fi
+ ./adi-test
+ rmmod adi 2> /dev/null
+}
+
+rc=0
+test_run
+exit $rc
diff --git a/tools/testing/selftests/sparc64/run.sh b/tools/testing/selftests/sparc64/run.sh
new file mode 100755
index 000000000..38ad61f93
--- /dev/null
+++ b/tools/testing/selftests/sparc64/run.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+(cd drivers; ./drivers_test.sh)
diff --git a/tools/testing/selftests/splice/.gitignore b/tools/testing/selftests/splice/.gitignore
new file mode 100644
index 000000000..1e23fefd6
--- /dev/null
+++ b/tools/testing/selftests/splice/.gitignore
@@ -0,0 +1 @@
+default_file_splice_read
diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile
new file mode 100644
index 000000000..e519b159b
--- /dev/null
+++ b/tools/testing/selftests/splice/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_PROGS := default_file_splice_read.sh
+TEST_GEN_PROGS_EXTENDED := default_file_splice_read
+
+include ../lib.mk
diff --git a/tools/testing/selftests/splice/default_file_splice_read.c b/tools/testing/selftests/splice/default_file_splice_read.c
new file mode 100644
index 000000000..a3c6e5672
--- /dev/null
+++ b/tools/testing/selftests/splice/default_file_splice_read.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <fcntl.h>
+
+int main(int argc, char **argv)
+{
+ splice(0, 0, 1, 0, 1<<30, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/splice/default_file_splice_read.sh b/tools/testing/selftests/splice/default_file_splice_read.sh
new file mode 100755
index 000000000..490db5a2e
--- /dev/null
+++ b/tools/testing/selftests/splice/default_file_splice_read.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+n=`./default_file_splice_read </dev/null | wc -c`
+
+test "$n" = 0 && exit 0
+
+echo "default_file_splice_read broken: leaked $n"
+exit 1
diff --git a/tools/testing/selftests/static_keys/Makefile b/tools/testing/selftests/static_keys/Makefile
new file mode 100644
index 000000000..9cdadf37f
--- /dev/null
+++ b/tools/testing/selftests/static_keys/Makefile
@@ -0,0 +1,8 @@
+# Makefile for static keys selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := test_static_keys.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/static_keys/config b/tools/testing/selftests/static_keys/config
new file mode 100644
index 000000000..d538fb774
--- /dev/null
+++ b/tools/testing/selftests/static_keys/config
@@ -0,0 +1 @@
+CONFIG_TEST_STATIC_KEYS=m
diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh
new file mode 100755
index 000000000..fc9f8cde7
--- /dev/null
+++ b/tools/testing/selftests/static_keys/test_static_keys.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs static keys kernel module tests
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n test_static_key_base; then
+ echo "static_key: module test_static_key_base is not found [SKIP]"
+ exit $ksft_skip
+fi
+
+if ! /sbin/modprobe -q -n test_static_keys; then
+ echo "static_key: module test_static_keys is not found [SKIP]"
+ exit $ksft_skip
+fi
+
+if /sbin/modprobe -q test_static_key_base; then
+ if /sbin/modprobe -q test_static_keys; then
+ echo "static_key: ok"
+ /sbin/modprobe -q -r test_static_keys
+ /sbin/modprobe -q -r test_static_key_base
+ else
+ echo "static_keys: [FAIL]"
+ /sbin/modprobe -q -r test_static_key_base
+ fi
+else
+ echo "static_key: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/sync/.gitignore b/tools/testing/selftests/sync/.gitignore
new file mode 100644
index 000000000..f5091e779
--- /dev/null
+++ b/tools/testing/selftests/sync/.gitignore
@@ -0,0 +1 @@
+sync_test
diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile
new file mode 100644
index 000000000..d0121a8a3
--- /dev/null
+++ b/tools/testing/selftests/sync/Makefile
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -O2 -g -std=gnu89 -pthread -Wall -Wextra
+CFLAGS += -I../../../../usr/include/
+LDFLAGS += -pthread
+
+.PHONY: all clean
+
+include ../lib.mk
+
+# lib.mk TEST_CUSTOM_PROGS var is for custom tests that need special
+# build rules. lib.mk will run and install them.
+
+TEST_CUSTOM_PROGS := $(OUTPUT)/sync_test
+all: $(TEST_CUSTOM_PROGS)
+
+OBJS = sync_test.o sync.o
+
+TESTS += sync_alloc.o
+TESTS += sync_fence.o
+TESTS += sync_merge.o
+TESTS += sync_wait.o
+TESTS += sync_stress_parallelism.o
+TESTS += sync_stress_consumer.o
+TESTS += sync_stress_merge.o
+
+OBJS := $(patsubst %,$(OUTPUT)/%,$(OBJS))
+TESTS := $(patsubst %,$(OUTPUT)/%,$(TESTS))
+
+$(TEST_CUSTOM_PROGS): $(TESTS) $(OBJS)
+ $(CC) -o $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) $(CFLAGS) $(LDFLAGS)
+
+$(OBJS): $(OUTPUT)/%.o: %.c
+ $(CC) -c $^ -o $@ $(CFLAGS)
+
+$(TESTS): $(OUTPUT)/%.o: %.c
+ $(CC) -c $^ -o $@
+
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS)
diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config
new file mode 100644
index 000000000..1ab7e8130
--- /dev/null
+++ b/tools/testing/selftests/sync/config
@@ -0,0 +1,4 @@
+CONFIG_STAGING=y
+CONFIG_ANDROID=y
+CONFIG_SYNC=y
+CONFIG_SW_SYNC=y
diff --git a/tools/testing/selftests/sync/sw_sync.h b/tools/testing/selftests/sync/sw_sync.h
new file mode 100644
index 000000000..e2cfc6bad
--- /dev/null
+++ b/tools/testing/selftests/sync/sw_sync.h
@@ -0,0 +1,46 @@
+/*
+ * sw_sync abstraction
+ *
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2013 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SELFTESTS_SW_SYNC_H
+#define SELFTESTS_SW_SYNC_H
+
+/*
+ * sw_sync is mainly intended for testing and should not be compiled into
+ * production kernels
+ */
+
+int sw_sync_timeline_create(void);
+int sw_sync_timeline_is_valid(int fd);
+int sw_sync_timeline_inc(int fd, unsigned int count);
+void sw_sync_timeline_destroy(int fd);
+
+int sw_sync_fence_create(int fd, const char *name, unsigned int value);
+int sw_sync_fence_is_valid(int fd);
+void sw_sync_fence_destroy(int fd);
+
+#endif
diff --git a/tools/testing/selftests/sync/sync.c b/tools/testing/selftests/sync/sync.c
new file mode 100644
index 000000000..f3d599f24
--- /dev/null
+++ b/tools/testing/selftests/sync/sync.c
@@ -0,0 +1,221 @@
+/*
+ * sync / sw_sync abstraction
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <fcntl.h>
+#include <malloc.h>
+#include <poll.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "sync.h"
+#include "sw_sync.h"
+
+#include <linux/sync_file.h>
+
+
+/* SW_SYNC ioctls */
+struct sw_sync_create_fence_data {
+ __u32 value;
+ char name[32];
+ __s32 fence;
+};
+
+#define SW_SYNC_IOC_MAGIC 'W'
+#define SW_SYNC_IOC_CREATE_FENCE _IOWR(SW_SYNC_IOC_MAGIC, 0,\
+ struct sw_sync_create_fence_data)
+#define SW_SYNC_IOC_INC _IOW(SW_SYNC_IOC_MAGIC, 1, __u32)
+
+
+int sync_wait(int fd, int timeout)
+{
+ struct pollfd fds;
+
+ fds.fd = fd;
+ fds.events = POLLIN | POLLERR;
+
+ return poll(&fds, 1, timeout);
+}
+
+int sync_merge(const char *name, int fd1, int fd2)
+{
+ struct sync_merge_data data = {};
+ int err;
+
+ data.fd2 = fd2;
+ strncpy(data.name, name, sizeof(data.name) - 1);
+ data.name[sizeof(data.name) - 1] = '\0';
+
+ err = ioctl(fd1, SYNC_IOC_MERGE, &data);
+ if (err < 0)
+ return err;
+
+ return data.fence;
+}
+
+static struct sync_file_info *sync_file_info(int fd)
+{
+ struct sync_file_info *info;
+ struct sync_fence_info *fence_info;
+ int err, num_fences;
+
+ info = calloc(1, sizeof(*info));
+ if (info == NULL)
+ return NULL;
+
+ err = ioctl(fd, SYNC_IOC_FILE_INFO, info);
+ if (err < 0) {
+ free(info);
+ return NULL;
+ }
+
+ num_fences = info->num_fences;
+
+ if (num_fences) {
+ info->flags = 0;
+ info->num_fences = num_fences;
+
+ fence_info = calloc(num_fences, sizeof(*fence_info));
+ if (!fence_info) {
+ free(info);
+ return NULL;
+ }
+
+ info->sync_fence_info = (uint64_t)fence_info;
+
+ err = ioctl(fd, SYNC_IOC_FILE_INFO, info);
+ if (err < 0) {
+ free(fence_info);
+ free(info);
+ return NULL;
+ }
+ }
+
+ return info;
+}
+
+static void sync_file_info_free(struct sync_file_info *info)
+{
+ free((void *)info->sync_fence_info);
+ free(info);
+}
+
+int sync_fence_size(int fd)
+{
+ int count;
+ struct sync_file_info *info = sync_file_info(fd);
+
+ if (!info)
+ return 0;
+
+ count = info->num_fences;
+
+ sync_file_info_free(info);
+
+ return count;
+}
+
+int sync_fence_count_with_status(int fd, int status)
+{
+ unsigned int i, count = 0;
+ struct sync_fence_info *fence_info = NULL;
+ struct sync_file_info *info = sync_file_info(fd);
+
+ if (!info)
+ return -1;
+
+ fence_info = (struct sync_fence_info *)info->sync_fence_info;
+ for (i = 0 ; i < info->num_fences ; i++) {
+ if (fence_info[i].status == status)
+ count++;
+ }
+
+ sync_file_info_free(info);
+
+ return count;
+}
+
+int sw_sync_timeline_create(void)
+{
+ return open("/sys/kernel/debug/sync/sw_sync", O_RDWR);
+}
+
+int sw_sync_timeline_inc(int fd, unsigned int count)
+{
+ __u32 arg = count;
+
+ return ioctl(fd, SW_SYNC_IOC_INC, &arg);
+}
+
+int sw_sync_timeline_is_valid(int fd)
+{
+ int status;
+
+ if (fd == -1)
+ return 0;
+
+ status = fcntl(fd, F_GETFD, 0);
+ return (status >= 0);
+}
+
+void sw_sync_timeline_destroy(int fd)
+{
+ if (sw_sync_timeline_is_valid(fd))
+ close(fd);
+}
+
+int sw_sync_fence_create(int fd, const char *name, unsigned int value)
+{
+ struct sw_sync_create_fence_data data = {};
+ int err;
+
+ data.value = value;
+ strncpy(data.name, name, sizeof(data.name) - 1);
+ data.name[sizeof(data.name) - 1] = '\0';
+
+ err = ioctl(fd, SW_SYNC_IOC_CREATE_FENCE, &data);
+ if (err < 0)
+ return err;
+
+ return data.fence;
+}
+
+int sw_sync_fence_is_valid(int fd)
+{
+ /* Same code! */
+ return sw_sync_timeline_is_valid(fd);
+}
+
+void sw_sync_fence_destroy(int fd)
+{
+ if (sw_sync_fence_is_valid(fd))
+ close(fd);
+}
diff --git a/tools/testing/selftests/sync/sync.h b/tools/testing/selftests/sync/sync.h
new file mode 100644
index 000000000..fb7156148
--- /dev/null
+++ b/tools/testing/selftests/sync/sync.h
@@ -0,0 +1,40 @@
+/*
+ * sync abstraction
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SELFTESTS_SYNC_H
+#define SELFTESTS_SYNC_H
+
+#define FENCE_STATUS_ERROR (-1)
+#define FENCE_STATUS_ACTIVE (0)
+#define FENCE_STATUS_SIGNALED (1)
+
+int sync_wait(int fd, int timeout);
+int sync_merge(const char *name, int fd1, int fd2);
+int sync_fence_size(int fd);
+int sync_fence_count_with_status(int fd, int status);
+
+#endif
diff --git a/tools/testing/selftests/sync/sync_alloc.c b/tools/testing/selftests/sync/sync_alloc.c
new file mode 100644
index 000000000..66a28afc0
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_alloc.c
@@ -0,0 +1,74 @@
+/*
+ * sync allocation tests
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+int test_alloc_timeline(void)
+{
+ int timeline, valid;
+
+ timeline = sw_sync_timeline_create();
+ valid = sw_sync_timeline_is_valid(timeline);
+ ASSERT(valid, "Failure allocating timeline\n");
+
+ sw_sync_timeline_destroy(timeline);
+ return 0;
+}
+
+int test_alloc_fence(void)
+{
+ int timeline, fence, valid;
+
+ timeline = sw_sync_timeline_create();
+ valid = sw_sync_timeline_is_valid(timeline);
+ ASSERT(valid, "Failure allocating timeline\n");
+
+ fence = sw_sync_fence_create(timeline, "allocFence", 1);
+ valid = sw_sync_fence_is_valid(fence);
+ ASSERT(valid, "Failure allocating fence\n");
+
+ sw_sync_fence_destroy(fence);
+ sw_sync_timeline_destroy(timeline);
+ return 0;
+}
+
+int test_alloc_fence_negative(void)
+{
+ int fence, timeline;
+
+ timeline = sw_sync_timeline_create();
+ ASSERT(timeline > 0, "Failure allocating timeline\n");
+
+ fence = sw_sync_fence_create(-1, "fence", 1);
+ ASSERT(fence < 0, "Success allocating negative fence\n");
+
+ sw_sync_fence_destroy(fence);
+ sw_sync_timeline_destroy(timeline);
+ return 0;
+}
diff --git a/tools/testing/selftests/sync/sync_fence.c b/tools/testing/selftests/sync/sync_fence.c
new file mode 100644
index 000000000..13f175287
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_fence.c
@@ -0,0 +1,132 @@
+/*
+ * sync fence tests with one timeline
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+int test_fence_one_timeline_wait(void)
+{
+ int fence, valid, ret;
+ int timeline = sw_sync_timeline_create();
+
+ valid = sw_sync_timeline_is_valid(timeline);
+ ASSERT(valid, "Failure allocating timeline\n");
+
+ fence = sw_sync_fence_create(timeline, "allocFence", 5);
+ valid = sw_sync_fence_is_valid(fence);
+ ASSERT(valid, "Failure allocating fence\n");
+
+ /* Wait on fence until timeout */
+ ret = sync_wait(fence, 0);
+ ASSERT(ret == 0, "Failure waiting on fence until timeout\n");
+
+ /* Advance timeline from 0 -> 1 */
+ ret = sw_sync_timeline_inc(timeline, 1);
+ ASSERT(ret == 0, "Failure advancing timeline\n");
+
+ /* Wait on fence until timeout */
+ ret = sync_wait(fence, 0);
+ ASSERT(ret == 0, "Failure waiting on fence until timeout\n");
+
+ /* Signal the fence */
+ ret = sw_sync_timeline_inc(timeline, 4);
+ ASSERT(ret == 0, "Failure signaling the fence\n");
+
+ /* Wait successfully */
+ ret = sync_wait(fence, 0);
+ ASSERT(ret > 0, "Failure waiting on fence\n");
+
+ /* Go even further, and confirm wait still succeeds */
+ ret = sw_sync_timeline_inc(timeline, 10);
+ ASSERT(ret == 0, "Failure going further\n");
+ ret = sync_wait(fence, 0);
+ ASSERT(ret > 0, "Failure waiting ahead\n");
+
+ sw_sync_fence_destroy(fence);
+ sw_sync_timeline_destroy(timeline);
+
+ return 0;
+}
+
+int test_fence_one_timeline_merge(void)
+{
+ int a, b, c, d, valid;
+ int timeline = sw_sync_timeline_create();
+
+ /* create fence a,b,c and then merge them all into fence d */
+ a = sw_sync_fence_create(timeline, "allocFence", 1);
+ b = sw_sync_fence_create(timeline, "allocFence", 2);
+ c = sw_sync_fence_create(timeline, "allocFence", 3);
+
+ valid = sw_sync_fence_is_valid(a) &&
+ sw_sync_fence_is_valid(b) &&
+ sw_sync_fence_is_valid(c);
+ ASSERT(valid, "Failure allocating fences\n");
+
+ d = sync_merge("mergeFence", b, a);
+ d = sync_merge("mergeFence", c, d);
+ valid = sw_sync_fence_is_valid(d);
+ ASSERT(valid, "Failure merging fences\n");
+
+ /* confirm all fences have one active point (even d) */
+ ASSERT(sync_fence_count_with_status(a, FENCE_STATUS_ACTIVE) == 1,
+ "a has too many active fences!\n");
+ ASSERT(sync_fence_count_with_status(a, FENCE_STATUS_ACTIVE) == 1,
+ "b has too many active fences!\n");
+ ASSERT(sync_fence_count_with_status(a, FENCE_STATUS_ACTIVE) == 1,
+ "c has too many active fences!\n");
+ ASSERT(sync_fence_count_with_status(a, FENCE_STATUS_ACTIVE) == 1,
+ "d has too many active fences!\n");
+
+ /* confirm that d is not signaled until the max of a,b,c */
+ sw_sync_timeline_inc(timeline, 1);
+ ASSERT(sync_fence_count_with_status(a, FENCE_STATUS_SIGNALED) == 1,
+ "a did not signal!\n");
+ ASSERT(sync_fence_count_with_status(d, FENCE_STATUS_ACTIVE) == 1,
+ "d signaled too early!\n");
+
+ sw_sync_timeline_inc(timeline, 1);
+ ASSERT(sync_fence_count_with_status(b, FENCE_STATUS_SIGNALED) == 1,
+ "b did not signal!\n");
+ ASSERT(sync_fence_count_with_status(d, FENCE_STATUS_ACTIVE) == 1,
+ "d signaled too early!\n");
+
+ sw_sync_timeline_inc(timeline, 1);
+ ASSERT(sync_fence_count_with_status(c, FENCE_STATUS_SIGNALED) == 1,
+ "c did not signal!\n");
+ ASSERT(sync_fence_count_with_status(d, FENCE_STATUS_ACTIVE) == 0 &&
+ sync_fence_count_with_status(d, FENCE_STATUS_SIGNALED) == 1,
+ "d did not signal!\n");
+
+ sw_sync_fence_destroy(d);
+ sw_sync_fence_destroy(c);
+ sw_sync_fence_destroy(b);
+ sw_sync_fence_destroy(a);
+ sw_sync_timeline_destroy(timeline);
+ return 0;
+}
diff --git a/tools/testing/selftests/sync/sync_merge.c b/tools/testing/selftests/sync/sync_merge.c
new file mode 100644
index 000000000..8914d4339
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_merge.c
@@ -0,0 +1,60 @@
+/*
+ * sync fence merge tests
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+int test_fence_merge_same_fence(void)
+{
+ int fence, valid, merged;
+ int timeline = sw_sync_timeline_create();
+
+ valid = sw_sync_timeline_is_valid(timeline);
+ ASSERT(valid, "Failure allocating timeline\n");
+
+ fence = sw_sync_fence_create(timeline, "allocFence", 5);
+ valid = sw_sync_fence_is_valid(fence);
+ ASSERT(valid, "Failure allocating fence\n");
+
+ merged = sync_merge("mergeFence", fence, fence);
+ valid = sw_sync_fence_is_valid(fence);
+ ASSERT(valid, "Failure merging fence\n");
+
+ ASSERT(sync_fence_count_with_status(merged, FENCE_STATUS_SIGNALED) == 0,
+ "fence signaled too early!\n");
+
+ sw_sync_timeline_inc(timeline, 5);
+ ASSERT(sync_fence_count_with_status(merged, FENCE_STATUS_SIGNALED) == 1,
+ "fence did not signal!\n");
+
+ sw_sync_fence_destroy(merged);
+ sw_sync_fence_destroy(fence);
+ sw_sync_timeline_destroy(timeline);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/sync/sync_stress_consumer.c b/tools/testing/selftests/sync/sync_stress_consumer.c
new file mode 100644
index 000000000..d9eff8d52
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_stress_consumer.c
@@ -0,0 +1,185 @@
+/*
+ * sync stress test: producer/consumer
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <pthread.h>
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+/* IMPORTANT NOTE: if you see this test failing on your system, it may be
+ * due to a shortage of file descriptors. Please ensure your system has
+ * a sensible limit for this test to finish correctly.
+ */
+
+/* Returns 1 on error, 0 on success */
+static int busy_wait_on_fence(int fence)
+{
+ int error, active;
+
+ do {
+ error = sync_fence_count_with_status(fence, FENCE_STATUS_ERROR);
+ ASSERT(error == 0, "Error occurred on fence\n");
+ active = sync_fence_count_with_status(fence,
+ FENCE_STATUS_ACTIVE);
+ } while (active);
+
+ return 0;
+}
+
+static struct {
+ int iterations;
+ int threads;
+ int counter;
+ int consumer_timeline;
+ int *producer_timelines;
+ pthread_mutex_t lock;
+} test_data_mpsc;
+
+static int mpsc_producer_thread(void *d)
+{
+ int id = (long)d;
+ int fence, valid, i;
+ int *producer_timelines = test_data_mpsc.producer_timelines;
+ int consumer_timeline = test_data_mpsc.consumer_timeline;
+ int iterations = test_data_mpsc.iterations;
+
+ for (i = 0; i < iterations; i++) {
+ fence = sw_sync_fence_create(consumer_timeline, "fence", i);
+ valid = sw_sync_fence_is_valid(fence);
+ ASSERT(valid, "Failure creating fence\n");
+
+ /*
+ * Wait for the consumer to finish. Use alternate
+ * means of waiting on the fence
+ */
+
+ if ((iterations + id) % 8 != 0) {
+ ASSERT(sync_wait(fence, -1) > 0,
+ "Failure waiting on fence\n");
+ } else {
+ ASSERT(busy_wait_on_fence(fence) == 0,
+ "Failure waiting on fence\n");
+ }
+
+ /*
+ * Every producer increments the counter, the consumer
+ * checks and erases it
+ */
+ pthread_mutex_lock(&test_data_mpsc.lock);
+ test_data_mpsc.counter++;
+ pthread_mutex_unlock(&test_data_mpsc.lock);
+
+ ASSERT(sw_sync_timeline_inc(producer_timelines[id], 1) == 0,
+ "Error advancing producer timeline\n");
+
+ sw_sync_fence_destroy(fence);
+ }
+
+ return 0;
+}
+
+static int mpcs_consumer_thread(void)
+{
+ int fence, merged, tmp, valid, it, i;
+ int *producer_timelines = test_data_mpsc.producer_timelines;
+ int consumer_timeline = test_data_mpsc.consumer_timeline;
+ int iterations = test_data_mpsc.iterations;
+ int n = test_data_mpsc.threads;
+
+ for (it = 1; it <= iterations; it++) {
+ fence = sw_sync_fence_create(producer_timelines[0], "name", it);
+ for (i = 1; i < n; i++) {
+ tmp = sw_sync_fence_create(producer_timelines[i],
+ "name", it);
+ merged = sync_merge("name", tmp, fence);
+ sw_sync_fence_destroy(tmp);
+ sw_sync_fence_destroy(fence);
+ fence = merged;
+ }
+
+ valid = sw_sync_fence_is_valid(fence);
+ ASSERT(valid, "Failure merging fences\n");
+
+ /*
+ * Make sure we see an increment from every producer thread.
+ * Vary the means by which we wait.
+ */
+ if (iterations % 8 != 0) {
+ ASSERT(sync_wait(fence, -1) > 0,
+ "Producers did not increment as expected\n");
+ } else {
+ ASSERT(busy_wait_on_fence(fence) == 0,
+ "Producers did not increment as expected\n");
+ }
+
+ ASSERT(test_data_mpsc.counter == n * it,
+ "Counter value mismatch!\n");
+
+ /* Release the producer threads */
+ ASSERT(sw_sync_timeline_inc(consumer_timeline, 1) == 0,
+ "Failure releasing producer threads\n");
+
+ sw_sync_fence_destroy(fence);
+ }
+
+ return 0;
+}
+
+int test_consumer_stress_multi_producer_single_consumer(void)
+{
+ int iterations = 1 << 12;
+ int n = 5;
+ long i, ret;
+ int producer_timelines[n];
+ int consumer_timeline;
+ pthread_t threads[n];
+
+ consumer_timeline = sw_sync_timeline_create();
+ for (i = 0; i < n; i++)
+ producer_timelines[i] = sw_sync_timeline_create();
+
+ test_data_mpsc.producer_timelines = producer_timelines;
+ test_data_mpsc.consumer_timeline = consumer_timeline;
+ test_data_mpsc.iterations = iterations;
+ test_data_mpsc.threads = n;
+ test_data_mpsc.counter = 0;
+ pthread_mutex_init(&test_data_mpsc.lock, NULL);
+
+ for (i = 0; i < n; i++) {
+ pthread_create(&threads[i], NULL, (void * (*)(void *))
+ mpsc_producer_thread, (void *)i);
+ }
+
+ /* Consumer thread runs here */
+ ret = mpcs_consumer_thread();
+
+ for (i = 0; i < n; i++)
+ pthread_join(threads[i], NULL);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/sync/sync_stress_merge.c b/tools/testing/selftests/sync/sync_stress_merge.c
new file mode 100644
index 000000000..99e83ef45
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_stress_merge.c
@@ -0,0 +1,115 @@
+/*
+ * sync stress test: merging
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+int test_merge_stress_random_merge(void)
+{
+ int i, size, ret;
+ int timeline_count = 32;
+ int merge_count = 1024 * 32;
+ int timelines[timeline_count];
+ int fence_map[timeline_count];
+ int fence, tmpfence, merged, valid;
+ int timeline, timeline_offset, sync_point;
+
+ srand(time(NULL));
+
+ for (i = 0; i < timeline_count; i++)
+ timelines[i] = sw_sync_timeline_create();
+
+ fence = sw_sync_fence_create(timelines[0], "fence", 0);
+ valid = sw_sync_fence_is_valid(fence);
+ ASSERT(valid, "Failure creating fence\n");
+
+ memset(fence_map, -1, sizeof(fence_map));
+ fence_map[0] = 0;
+
+ /*
+ * Randomly create sync_points out of a fixed set of timelines,
+ * and merge them together
+ */
+ for (i = 0; i < merge_count; i++) {
+ /* Generate sync_point. */
+ timeline_offset = rand() % timeline_count;
+ timeline = timelines[timeline_offset];
+ sync_point = rand();
+
+ /* Keep track of the latest sync_point in each timeline. */
+ if (fence_map[timeline_offset] == -1)
+ fence_map[timeline_offset] = sync_point;
+ else if (fence_map[timeline_offset] < sync_point)
+ fence_map[timeline_offset] = sync_point;
+
+ /* Merge */
+ tmpfence = sw_sync_fence_create(timeline, "fence", sync_point);
+ merged = sync_merge("merge", tmpfence, fence);
+ sw_sync_fence_destroy(tmpfence);
+ sw_sync_fence_destroy(fence);
+ fence = merged;
+
+ valid = sw_sync_fence_is_valid(merged);
+ ASSERT(valid, "Failure creating fence i\n");
+ }
+
+ size = 0;
+ for (i = 0; i < timeline_count; i++)
+ if (fence_map[i] != -1)
+ size++;
+
+ /* Confirm our map matches the fence. */
+ ASSERT(sync_fence_size(fence) == size,
+ "Quantity of elements not matching\n");
+
+ /* Trigger the merged fence */
+ for (i = 0; i < timeline_count; i++) {
+ if (fence_map[i] != -1) {
+ ret = sync_wait(fence, 0);
+ ASSERT(ret == 0,
+ "Failure waiting on fence until timeout\n");
+ /* Increment the timeline to the last sync_point */
+ sw_sync_timeline_inc(timelines[i], fence_map[i]);
+ }
+ }
+
+ /* Check that the fence is triggered. */
+ ret = sync_wait(fence, 0);
+ ASSERT(ret > 0, "Failure triggering fence\n");
+
+ sw_sync_fence_destroy(fence);
+
+ for (i = 0; i < timeline_count; i++)
+ sw_sync_timeline_destroy(timelines[i]);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/sync/sync_stress_parallelism.c b/tools/testing/selftests/sync/sync_stress_parallelism.c
new file mode 100644
index 000000000..e6c9be671
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_stress_parallelism.c
@@ -0,0 +1,111 @@
+/*
+ * sync stress test: parallelism
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <pthread.h>
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+static struct {
+ int iterations;
+ int timeline;
+ int counter;
+} test_data_two_threads;
+
+static int test_stress_two_threads_shared_timeline_thread(void *d)
+{
+ int thread_id = (long)d;
+ int timeline = test_data_two_threads.timeline;
+ int iterations = test_data_two_threads.iterations;
+ int fence, valid, ret, i;
+
+ for (i = 0; i < iterations; i++) {
+ fence = sw_sync_fence_create(timeline, "fence",
+ i * 2 + thread_id);
+ valid = sw_sync_fence_is_valid(fence);
+ ASSERT(valid, "Failure allocating fence\n");
+
+ /* Wait on the prior thread to complete */
+ ret = sync_wait(fence, -1);
+ ASSERT(ret > 0, "Problem occurred on prior thread\n");
+
+ /*
+ * Confirm the previous thread's writes are visible
+ * and then increment
+ */
+ ASSERT(test_data_two_threads.counter == i * 2 + thread_id,
+ "Counter got damaged!\n");
+ test_data_two_threads.counter++;
+
+ /* Kick off the other thread */
+ ret = sw_sync_timeline_inc(timeline, 1);
+ ASSERT(ret == 0, "Advancing timeline failed\n");
+
+ sw_sync_fence_destroy(fence);
+ }
+
+ return 0;
+}
+
+int test_stress_two_threads_shared_timeline(void)
+{
+ pthread_t a, b;
+ int valid;
+ int timeline = sw_sync_timeline_create();
+
+ valid = sw_sync_timeline_is_valid(timeline);
+ ASSERT(valid, "Failure allocating timeline\n");
+
+ test_data_two_threads.iterations = 1 << 16;
+ test_data_two_threads.counter = 0;
+ test_data_two_threads.timeline = timeline;
+
+ /*
+ * Use a single timeline to synchronize two threads
+ * hammmering on the same counter.
+ */
+
+ pthread_create(&a, NULL, (void *(*)(void *))
+ test_stress_two_threads_shared_timeline_thread,
+ (void *)0);
+ pthread_create(&b, NULL, (void *(*)(void *))
+ test_stress_two_threads_shared_timeline_thread,
+ (void *)1);
+
+ pthread_join(a, NULL);
+ pthread_join(b, NULL);
+
+ /* make sure the threads did not trample on one another */
+ ASSERT(test_data_two_threads.counter ==
+ test_data_two_threads.iterations * 2,
+ "Counter has unexpected value\n");
+
+ sw_sync_timeline_destroy(timeline);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/sync/sync_test.c b/tools/testing/selftests/sync/sync_test.c
new file mode 100644
index 000000000..7f7938263
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_test.c
@@ -0,0 +1,113 @@
+/*
+ * sync test runner
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <string.h>
+
+#include "../kselftest.h"
+#include "synctest.h"
+
+static int run_test(int (*test)(void), char *name)
+{
+ int result;
+ pid_t childpid;
+ int ret;
+
+ fflush(stdout);
+ childpid = fork();
+
+ if (childpid) {
+ waitpid(childpid, &result, 0);
+ if (WIFEXITED(result)) {
+ ret = WEXITSTATUS(result);
+ if (!ret)
+ ksft_test_result_pass("[RUN]\t%s\n", name);
+ else
+ ksft_test_result_fail("[RUN]\t%s\n", name);
+ return ret;
+ }
+ return 1;
+ }
+
+ exit(test());
+}
+
+static void sync_api_supported(void)
+{
+ struct stat sbuf;
+ int ret;
+
+ ret = stat("/sys/kernel/debug/sync/sw_sync", &sbuf);
+ if (!ret)
+ return;
+
+ if (errno == ENOENT)
+ ksft_exit_skip("Sync framework not supported by kernel\n");
+
+ if (errno == EACCES)
+ ksft_exit_skip("Run Sync test as root.\n");
+
+ ksft_exit_fail_msg("stat failed on /sys/kernel/debug/sync/sw_sync: %s",
+ strerror(errno));
+}
+
+int main(void)
+{
+ int err;
+
+ ksft_print_header();
+
+ sync_api_supported();
+
+ ksft_print_msg("[RUN]\tTesting sync framework\n");
+
+ RUN_TEST(test_alloc_timeline);
+ RUN_TEST(test_alloc_fence);
+ RUN_TEST(test_alloc_fence_negative);
+
+ RUN_TEST(test_fence_one_timeline_wait);
+ RUN_TEST(test_fence_one_timeline_merge);
+ RUN_TEST(test_fence_merge_same_fence);
+ RUN_TEST(test_fence_multi_timeline_wait);
+ RUN_TEST(test_stress_two_threads_shared_timeline);
+ RUN_TEST(test_consumer_stress_multi_producer_single_consumer);
+ RUN_TEST(test_merge_stress_random_merge);
+
+ err = ksft_get_fail_cnt();
+ if (err)
+ ksft_exit_fail_msg("%d out of %d sync tests failed\n",
+ err, ksft_test_num());
+
+ /* need this return to keep gcc happy */
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/sync/sync_wait.c b/tools/testing/selftests/sync/sync_wait.c
new file mode 100644
index 000000000..d69b752f6
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_wait.c
@@ -0,0 +1,91 @@
+/*
+ * sync fence wait tests
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+int test_fence_multi_timeline_wait(void)
+{
+ int timelineA, timelineB, timelineC;
+ int fenceA, fenceB, fenceC, merged;
+ int valid, active, signaled, ret;
+
+ timelineA = sw_sync_timeline_create();
+ timelineB = sw_sync_timeline_create();
+ timelineC = sw_sync_timeline_create();
+
+ fenceA = sw_sync_fence_create(timelineA, "fenceA", 5);
+ fenceB = sw_sync_fence_create(timelineB, "fenceB", 5);
+ fenceC = sw_sync_fence_create(timelineC, "fenceC", 5);
+
+ merged = sync_merge("mergeFence", fenceB, fenceA);
+ merged = sync_merge("mergeFence", fenceC, merged);
+
+ valid = sw_sync_fence_is_valid(merged);
+ ASSERT(valid, "Failure merging fence from various timelines\n");
+
+ /* Confirm fence isn't signaled */
+ active = sync_fence_count_with_status(merged, FENCE_STATUS_ACTIVE);
+ ASSERT(active == 3, "Fence signaled too early!\n");
+
+ ret = sync_wait(merged, 0);
+ ASSERT(ret == 0,
+ "Failure waiting on fence until timeout\n");
+
+ ret = sw_sync_timeline_inc(timelineA, 5);
+ active = sync_fence_count_with_status(merged, FENCE_STATUS_ACTIVE);
+ signaled = sync_fence_count_with_status(merged, FENCE_STATUS_SIGNALED);
+ ASSERT(active == 2 && signaled == 1,
+ "Fence did not signal properly!\n");
+
+ ret = sw_sync_timeline_inc(timelineB, 5);
+ active = sync_fence_count_with_status(merged, FENCE_STATUS_ACTIVE);
+ signaled = sync_fence_count_with_status(merged, FENCE_STATUS_SIGNALED);
+ ASSERT(active == 1 && signaled == 2,
+ "Fence did not signal properly!\n");
+
+ ret = sw_sync_timeline_inc(timelineC, 5);
+ active = sync_fence_count_with_status(merged, FENCE_STATUS_ACTIVE);
+ signaled = sync_fence_count_with_status(merged, FENCE_STATUS_SIGNALED);
+ ASSERT(active == 0 && signaled == 3,
+ "Fence did not signal properly!\n");
+
+ /* confirm you can successfully wait */
+ ret = sync_wait(merged, 100);
+ ASSERT(ret > 0, "Failure waiting on signaled fence\n");
+
+ sw_sync_fence_destroy(merged);
+ sw_sync_fence_destroy(fenceC);
+ sw_sync_fence_destroy(fenceB);
+ sw_sync_fence_destroy(fenceA);
+ sw_sync_timeline_destroy(timelineC);
+ sw_sync_timeline_destroy(timelineB);
+ sw_sync_timeline_destroy(timelineA);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/sync/synctest.h b/tools/testing/selftests/sync/synctest.h
new file mode 100644
index 000000000..90a8e5369
--- /dev/null
+++ b/tools/testing/selftests/sync/synctest.h
@@ -0,0 +1,67 @@
+/*
+ * sync tests
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SELFTESTS_SYNCTEST_H
+#define SELFTESTS_SYNCTEST_H
+
+#include <stdio.h>
+#include "../kselftest.h"
+
+#define ASSERT(cond, msg) do { \
+ if (!(cond)) { \
+ ksft_print_msg("[ERROR]\t%s", (msg)); \
+ return 1; \
+ } \
+} while (0)
+
+#define RUN_TEST(x) run_test((x), #x)
+
+/* Allocation tests */
+int test_alloc_timeline(void);
+int test_alloc_fence(void);
+int test_alloc_fence_negative(void);
+
+/* Fence tests with one timeline */
+int test_fence_one_timeline_wait(void);
+int test_fence_one_timeline_merge(void);
+
+/* Fence merge tests */
+int test_fence_merge_same_fence(void);
+
+/* Fence wait tests */
+int test_fence_multi_timeline_wait(void);
+
+/* Stress test - parallelism */
+int test_stress_two_threads_shared_timeline(void);
+
+/* Stress test - consumer */
+int test_consumer_stress_multi_producer_single_consumer(void);
+
+/* Stress test - merging */
+int test_merge_stress_random_merge(void);
+
+#endif
diff --git a/tools/testing/selftests/sysctl/Makefile b/tools/testing/selftests/sysctl/Makefile
new file mode 100644
index 000000000..95c320b35
--- /dev/null
+++ b/tools/testing/selftests/sysctl/Makefile
@@ -0,0 +1,12 @@
+# Makefile for sysctl selftests.
+# Expects kernel.sysctl_writes_strict=1.
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests".
+all:
+
+TEST_PROGS := sysctl.sh
+
+include ../lib.mk
+
+# Nothing to clean up.
+clean:
diff --git a/tools/testing/selftests/sysctl/config b/tools/testing/selftests/sysctl/config
new file mode 100644
index 000000000..6ca14800d
--- /dev/null
+++ b/tools/testing/selftests/sysctl/config
@@ -0,0 +1 @@
+CONFIG_TEST_SYSCTL=y
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
new file mode 100755
index 000000000..584eb8ea7
--- /dev/null
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -0,0 +1,780 @@
+#!/bin/bash
+# Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or at your option any
+# later version; or, when distributed separately from the Linux kernel or
+# when incorporated into other software packages, subject to the following
+# license:
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of copyleft-next (version 0.3.1 or later) as published
+# at http://copyleft-next.org/.
+
+# This performs a series tests against the proc sysctl interface.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+TEST_NAME="sysctl"
+TEST_DRIVER="test_${TEST_NAME}"
+TEST_DIR=$(dirname $0)
+TEST_FILE=$(mktemp)
+
+# This represents
+#
+# TEST_ID:TEST_COUNT:ENABLED
+#
+# TEST_ID: is the test id number
+# TEST_COUNT: number of times we should run the test
+# ENABLED: 1 if enabled, 0 otherwise
+#
+# Once these are enabled please leave them as-is. Write your own test,
+# we have tons of space.
+ALL_TESTS="0001:1:1"
+ALL_TESTS="$ALL_TESTS 0002:1:1"
+ALL_TESTS="$ALL_TESTS 0003:1:1"
+ALL_TESTS="$ALL_TESTS 0004:1:1"
+ALL_TESTS="$ALL_TESTS 0005:3:1"
+
+test_modprobe()
+{
+ if [ ! -d $DIR ]; then
+ echo "$0: $DIR not present" >&2
+ echo "You must have the following enabled in your kernel:" >&2
+ cat $TEST_DIR/config >&2
+ exit $ksft_skip
+ fi
+}
+
+function allow_user_defaults()
+{
+ if [ -z $DIR ]; then
+ DIR="/sys/module/test_sysctl/"
+ fi
+ if [ -z $DEFAULT_NUM_TESTS ]; then
+ DEFAULT_NUM_TESTS=50
+ fi
+ if [ -z $SYSCTL ]; then
+ SYSCTL="/proc/sys/debug/test_sysctl"
+ fi
+ if [ -z $PROD_SYSCTL ]; then
+ PROD_SYSCTL="/proc/sys"
+ fi
+ if [ -z $WRITES_STRICT ]; then
+ WRITES_STRICT="${PROD_SYSCTL}/kernel/sysctl_writes_strict"
+ fi
+}
+
+function check_production_sysctl_writes_strict()
+{
+ echo -n "Checking production write strict setting ... "
+ if [ ! -e ${WRITES_STRICT} ]; then
+ echo "FAIL, but skip in case of old kernel" >&2
+ else
+ old_strict=$(cat ${WRITES_STRICT})
+ if [ "$old_strict" = "1" ]; then
+ echo "ok"
+ else
+ echo "FAIL, strict value is 0 but force to 1 to continue" >&2
+ echo "1" > ${WRITES_STRICT}
+ fi
+ fi
+
+ if [ -z $PAGE_SIZE ]; then
+ PAGE_SIZE=$(getconf PAGESIZE)
+ fi
+ if [ -z $MAX_DIGITS ]; then
+ MAX_DIGITS=$(($PAGE_SIZE/8))
+ fi
+ if [ -z $INT_MAX ]; then
+ INT_MAX=$(getconf INT_MAX)
+ fi
+ if [ -z $UINT_MAX ]; then
+ UINT_MAX=$(getconf UINT_MAX)
+ fi
+}
+
+test_reqs()
+{
+ uid=$(id -u)
+ if [ $uid -ne 0 ]; then
+ echo $msg must be run as root >&2
+ exit $ksft_skip
+ fi
+
+ if ! which perl 2> /dev/null > /dev/null; then
+ echo "$0: You need perl installed"
+ exit $ksft_skip
+ fi
+ if ! which getconf 2> /dev/null > /dev/null; then
+ echo "$0: You need getconf installed"
+ exit $ksft_skip
+ fi
+ if ! which diff 2> /dev/null > /dev/null; then
+ echo "$0: You need diff installed"
+ exit $ksft_skip
+ fi
+}
+
+function load_req_mod()
+{
+ if [ ! -d $DIR ]; then
+ if ! modprobe -q -n $TEST_DRIVER; then
+ echo "$0: module $TEST_DRIVER not found [SKIP]"
+ exit $ksft_skip
+ fi
+ modprobe $TEST_DRIVER
+ if [ $? -ne 0 ]; then
+ exit
+ fi
+ fi
+}
+
+reset_vals()
+{
+ VAL=""
+ TRIGGER=$(basename ${TARGET})
+ case "$TRIGGER" in
+ int_0001)
+ VAL="60"
+ ;;
+ int_0002)
+ VAL="1"
+ ;;
+ uint_0001)
+ VAL="314"
+ ;;
+ string_0001)
+ VAL="(none)"
+ ;;
+ *)
+ ;;
+ esac
+ echo -n $VAL > $TARGET
+}
+
+set_orig()
+{
+ if [ ! -z $TARGET ]; then
+ echo "${ORIG}" > "${TARGET}"
+ fi
+}
+
+set_test()
+{
+ echo "${TEST_STR}" > "${TARGET}"
+}
+
+verify()
+{
+ local seen
+ seen=$(cat "$1")
+ if [ "${seen}" != "${TEST_STR}" ]; then
+ return 1
+ fi
+ return 0
+}
+
+verify_diff_w()
+{
+ echo "$TEST_STR" | diff -q -w -u - $1
+ return $?
+}
+
+test_rc()
+{
+ if [[ $rc != 0 ]]; then
+ echo "Failed test, return value: $rc" >&2
+ exit $rc
+ fi
+}
+
+test_finish()
+{
+ set_orig
+ rm -f "${TEST_FILE}"
+
+ if [ ! -z ${old_strict} ]; then
+ echo ${old_strict} > ${WRITES_STRICT}
+ fi
+ exit $rc
+}
+
+run_numerictests()
+{
+ echo "== Testing sysctl behavior against ${TARGET} =="
+
+ rc=0
+
+ echo -n "Writing test file ... "
+ echo "${TEST_STR}" > "${TEST_FILE}"
+ if ! verify "${TEST_FILE}"; then
+ echo "FAIL" >&2
+ exit 1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Checking sysctl is not set to test value ... "
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ exit 1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Writing sysctl from shell ... "
+ set_test
+ if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ exit 1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Resetting sysctl to original value ... "
+ set_orig
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ exit 1
+ else
+ echo "ok"
+ fi
+
+ # Now that we've validated the sanity of "set_test" and "set_orig",
+ # we can use those functions to set starting states before running
+ # specific behavioral tests.
+
+ echo -n "Writing entire sysctl in single write ... "
+ set_orig
+ dd if="${TEST_FILE}" of="${TARGET}" bs=4096 2>/dev/null
+ if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Writing middle of sysctl after synchronized seek ... "
+ set_test
+ dd if="${TEST_FILE}" of="${TARGET}" bs=1 seek=1 skip=1 2>/dev/null
+ if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Writing beyond end of sysctl ... "
+ set_orig
+ dd if="${TEST_FILE}" of="${TARGET}" bs=20 seek=2 2>/dev/null
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Writing sysctl with multiple long writes ... "
+ set_orig
+ (perl -e 'print "A" x 50;'; echo "${TEST_STR}") | \
+ dd of="${TARGET}" bs=50 2>/dev/null
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+}
+
+# Your test must accept digits 3 and 4 to use this
+run_limit_digit()
+{
+ echo -n "Checking ignoring spaces up to PAGE_SIZE works on write ..."
+ reset_vals
+
+ LIMIT=$((MAX_DIGITS -1))
+ TEST_STR="3"
+ (perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+ dd of="${TARGET}" 2>/dev/null
+
+ if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+
+ echo -n "Checking passing PAGE_SIZE of spaces fails on write ..."
+ reset_vals
+
+ LIMIT=$((MAX_DIGITS))
+ TEST_STR="4"
+ (perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+ dd of="${TARGET}" 2>/dev/null
+
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+}
+
+# You are using an int
+run_limit_digit_int()
+{
+ echo -n "Testing INT_MAX works ..."
+ reset_vals
+ TEST_STR="$INT_MAX"
+ echo -n $TEST_STR > $TARGET
+
+ if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+
+ echo -n "Testing INT_MAX + 1 will fail as expected..."
+ reset_vals
+ let TEST_STR=$INT_MAX+1
+ echo -n $TEST_STR > $TARGET 2> /dev/null
+
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+
+ echo -n "Testing negative values will work as expected..."
+ reset_vals
+ TEST_STR="-3"
+ echo -n $TEST_STR > $TARGET 2> /dev/null
+ if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+}
+
+# You used an int array
+run_limit_digit_int_array()
+{
+ echo -n "Testing array works as expected ... "
+ TEST_STR="4 3 2 1"
+ echo -n $TEST_STR > $TARGET
+
+ if ! verify_diff_w "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+
+ echo -n "Testing skipping trailing array elements works ... "
+ # Do not reset_vals, carry on the values from the last test.
+ # If we only echo in two digits the last two are left intact
+ TEST_STR="100 101"
+ echo -n $TEST_STR > $TARGET
+ # After we echo in, to help diff we need to set on TEST_STR what
+ # we expect the result to be.
+ TEST_STR="100 101 2 1"
+
+ if ! verify_diff_w "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+
+ echo -n "Testing PAGE_SIZE limit on array works ... "
+ # Do not reset_vals, carry on the values from the last test.
+ # Even if you use an int array, you are still restricted to
+ # MAX_DIGITS, this is a known limitation. Test limit works.
+ LIMIT=$((MAX_DIGITS -1))
+ TEST_STR="9"
+ (perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+ dd of="${TARGET}" 2>/dev/null
+
+ TEST_STR="9 101 2 1"
+ if ! verify_diff_w "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+
+ echo -n "Testing exceeding PAGE_SIZE limit fails as expected ... "
+ # Do not reset_vals, carry on the values from the last test.
+ # Now go over limit.
+ LIMIT=$((MAX_DIGITS))
+ TEST_STR="7"
+ (perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+ dd of="${TARGET}" 2>/dev/null
+
+ TEST_STR="7 101 2 1"
+ if verify_diff_w "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+}
+
+# You are using an unsigned int
+run_limit_digit_uint()
+{
+ echo -n "Testing UINT_MAX works ..."
+ reset_vals
+ TEST_STR="$UINT_MAX"
+ echo -n $TEST_STR > $TARGET
+
+ if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+
+ echo -n "Testing UINT_MAX + 1 will fail as expected..."
+ reset_vals
+ TEST_STR=$(($UINT_MAX+1))
+ echo -n $TEST_STR > $TARGET 2> /dev/null
+
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+
+ echo -n "Testing negative values will not work as expected ..."
+ reset_vals
+ TEST_STR="-3"
+ echo -n $TEST_STR > $TARGET 2> /dev/null
+
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+}
+
+run_stringtests()
+{
+ echo -n "Writing entire sysctl in short writes ... "
+ set_orig
+ dd if="${TEST_FILE}" of="${TARGET}" bs=1 2>/dev/null
+ if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Writing middle of sysctl after unsynchronized seek ... "
+ set_test
+ dd if="${TEST_FILE}" of="${TARGET}" bs=1 seek=1 2>/dev/null
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Checking sysctl maxlen is at least $MAXLEN ... "
+ set_orig
+ perl -e 'print "A" x ('"${MAXLEN}"'-2), "B";' | \
+ dd of="${TARGET}" bs="${MAXLEN}" 2>/dev/null
+ if ! grep -q B "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Checking sysctl keeps original string on overflow append ... "
+ set_orig
+ perl -e 'print "A" x ('"${MAXLEN}"'-1), "B";' | \
+ dd of="${TARGET}" bs=$(( MAXLEN - 1 )) 2>/dev/null
+ if grep -q B "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Checking sysctl stays NULL terminated on write ... "
+ set_orig
+ perl -e 'print "A" x ('"${MAXLEN}"'-1), "B";' | \
+ dd of="${TARGET}" bs="${MAXLEN}" 2>/dev/null
+ if grep -q B "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Checking sysctl stays NULL terminated on overwrite ... "
+ set_orig
+ perl -e 'print "A" x ('"${MAXLEN}"'-1), "BB";' | \
+ dd of="${TARGET}" bs=$(( $MAXLEN + 1 )) 2>/dev/null
+ if grep -q B "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ test_rc
+}
+
+sysctl_test_0001()
+{
+ TARGET="${SYSCTL}/int_0001"
+ reset_vals
+ ORIG=$(cat "${TARGET}")
+ TEST_STR=$(( $ORIG + 1 ))
+
+ run_numerictests
+ run_limit_digit
+}
+
+sysctl_test_0002()
+{
+ TARGET="${SYSCTL}/string_0001"
+ reset_vals
+ ORIG=$(cat "${TARGET}")
+ TEST_STR="Testing sysctl"
+ # Only string sysctls support seeking/appending.
+ MAXLEN=65
+
+ run_numerictests
+ run_stringtests
+}
+
+sysctl_test_0003()
+{
+ TARGET="${SYSCTL}/int_0002"
+ reset_vals
+ ORIG=$(cat "${TARGET}")
+ TEST_STR=$(( $ORIG + 1 ))
+
+ run_numerictests
+ run_limit_digit
+ run_limit_digit_int
+}
+
+sysctl_test_0004()
+{
+ TARGET="${SYSCTL}/uint_0001"
+ reset_vals
+ ORIG=$(cat "${TARGET}")
+ TEST_STR=$(( $ORIG + 1 ))
+
+ run_numerictests
+ run_limit_digit
+ run_limit_digit_uint
+}
+
+sysctl_test_0005()
+{
+ TARGET="${SYSCTL}/int_0003"
+ reset_vals
+ ORIG=$(cat "${TARGET}")
+
+ run_limit_digit_int_array
+}
+
+list_tests()
+{
+ echo "Test ID list:"
+ echo
+ echo "TEST_ID x NUM_TEST"
+ echo "TEST_ID: Test ID"
+ echo "NUM_TESTS: Number of recommended times to run the test"
+ echo
+ echo "0001 x $(get_test_count 0001) - tests proc_dointvec_minmax()"
+ echo "0002 x $(get_test_count 0002) - tests proc_dostring()"
+ echo "0003 x $(get_test_count 0003) - tests proc_dointvec()"
+ echo "0004 x $(get_test_count 0004) - tests proc_douintvec()"
+ echo "0005 x $(get_test_count 0005) - tests proc_douintvec() array"
+}
+
+test_reqs
+
+usage()
+{
+ NUM_TESTS=$(grep -o ' ' <<<"$ALL_TESTS" | grep -c .)
+ let NUM_TESTS=$NUM_TESTS+1
+ MAX_TEST=$(printf "%04d\n" $NUM_TESTS)
+ echo "Usage: $0 [ -t <4-number-digit> ] | [ -w <4-number-digit> ] |"
+ echo " [ -s <4-number-digit> ] | [ -c <4-number-digit> <test- count>"
+ echo " [ all ] [ -h | --help ] [ -l ]"
+ echo ""
+ echo "Valid tests: 0001-$MAX_TEST"
+ echo ""
+ echo " all Runs all tests (default)"
+ echo " -t Run test ID the number amount of times is recommended"
+ echo " -w Watch test ID run until it runs into an error"
+ echo " -c Run test ID once"
+ echo " -s Run test ID x test-count number of times"
+ echo " -l List all test ID list"
+ echo " -h|--help Help"
+ echo
+ echo "If an error every occurs execution will immediately terminate."
+ echo "If you are adding a new test try using -w <test-ID> first to"
+ echo "make sure the test passes a series of tests."
+ echo
+ echo Example uses:
+ echo
+ echo "$TEST_NAME.sh -- executes all tests"
+ echo "$TEST_NAME.sh -t 0002 -- Executes test ID 0002 number of times is recomended"
+ echo "$TEST_NAME.sh -w 0002 -- Watch test ID 0002 run until an error occurs"
+ echo "$TEST_NAME.sh -s 0002 -- Run test ID 0002 once"
+ echo "$TEST_NAME.sh -c 0002 3 -- Run test ID 0002 three times"
+ echo
+ list_tests
+ exit 1
+}
+
+function test_num()
+{
+ re='^[0-9]+$'
+ if ! [[ $1 =~ $re ]]; then
+ usage
+ fi
+}
+
+function get_test_count()
+{
+ test_num $1
+ TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+ LAST_TWO=${TEST_DATA#*:*}
+ echo ${LAST_TWO%:*}
+}
+
+function get_test_enabled()
+{
+ test_num $1
+ TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+ echo ${TEST_DATA#*:*:}
+}
+
+function run_all_tests()
+{
+ for i in $ALL_TESTS ; do
+ TEST_ID=${i%:*:*}
+ ENABLED=$(get_test_enabled $TEST_ID)
+ TEST_COUNT=$(get_test_count $TEST_ID)
+ if [[ $ENABLED -eq "1" ]]; then
+ test_case $TEST_ID $TEST_COUNT
+ fi
+ done
+}
+
+function watch_log()
+{
+ if [ $# -ne 3 ]; then
+ clear
+ fi
+ date
+ echo "Running test: $2 - run #$1"
+}
+
+function watch_case()
+{
+ i=0
+ while [ 1 ]; do
+
+ if [ $# -eq 1 ]; then
+ test_num $1
+ watch_log $i ${TEST_NAME}_test_$1
+ ${TEST_NAME}_test_$1
+ else
+ watch_log $i all
+ run_all_tests
+ fi
+ let i=$i+1
+ done
+}
+
+function test_case()
+{
+ NUM_TESTS=$DEFAULT_NUM_TESTS
+ if [ $# -eq 2 ]; then
+ NUM_TESTS=$2
+ fi
+
+ i=0
+ while [ $i -lt $NUM_TESTS ]; do
+ test_num $1
+ watch_log $i ${TEST_NAME}_test_$1 noclear
+ RUN_TEST=${TEST_NAME}_test_$1
+ $RUN_TEST
+ let i=$i+1
+ done
+}
+
+function parse_args()
+{
+ if [ $# -eq 0 ]; then
+ run_all_tests
+ else
+ if [[ "$1" = "all" ]]; then
+ run_all_tests
+ elif [[ "$1" = "-w" ]]; then
+ shift
+ watch_case $@
+ elif [[ "$1" = "-t" ]]; then
+ shift
+ test_num $1
+ test_case $1 $(get_test_count $1)
+ elif [[ "$1" = "-c" ]]; then
+ shift
+ test_num $1
+ test_num $2
+ test_case $1 $2
+ elif [[ "$1" = "-s" ]]; then
+ shift
+ test_case $1 1
+ elif [[ "$1" = "-l" ]]; then
+ list_tests
+ elif [[ "$1" = "-h" || "$1" = "--help" ]]; then
+ usage
+ else
+ usage
+ fi
+ fi
+}
+
+test_reqs
+allow_user_defaults
+check_production_sysctl_writes_strict
+test_modprobe
+load_req_mod
+
+trap "test_finish" EXIT
+
+parse_args $@
+
+exit 0
diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore
new file mode 100644
index 000000000..7a60b85e1
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/.gitignore
@@ -0,0 +1,2 @@
+__pycache__/
+*.pyc
diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
new file mode 100644
index 000000000..49a6f8c3f
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/README
@@ -0,0 +1,247 @@
+tdc - Linux Traffic Control (tc) unit testing suite
+
+Author: Lucas Bates - lucasb@mojatatu.com
+
+tdc is a Python script to load tc unit tests from a separate JSON file and
+execute them inside a network namespace dedicated to the task.
+
+
+REQUIREMENTS
+------------
+
+* Minimum Python version of 3.4. Earlier 3.X versions may work but are not
+ guaranteed.
+
+* The kernel must have network namespace support
+
+* The kernel must have veth support available, as a veth pair is created
+ prior to running the tests.
+
+* The kernel must have the appropriate infrastructure enabled to run all tdc
+ unit tests. See the config file in this directory for minimum required
+ features. As new tests will be added, config options list will be updated.
+
+* All tc-related features being tested must be built in or available as
+ modules. To check what is required in current setup run:
+ ./tdc.py -c
+
+ Note:
+ In the current release, tdc run will abort due to a failure in setup or
+ teardown commands - which includes not being able to run a test simply
+ because the kernel did not support a specific feature. (This will be
+ handled in a future version - the current workaround is to run the tests
+ on specific test categories that your kernel supports)
+
+
+BEFORE YOU RUN
+--------------
+
+The path to the tc executable that will be most commonly tested can be defined
+in the tdc_config.py file. Find the 'TC' entry in the NAMES dictionary and
+define the path.
+
+If you need to test a different tc executable on the fly, you can do so by
+using the -p option when running tdc:
+ ./tdc.py -p /path/to/tc
+
+
+RUNNING TDC
+-----------
+
+To use tdc, root privileges are required. This is because the
+commands being tested must be run as root. The code that enforces
+execution by root uid has been moved into a plugin (see PLUGIN
+ARCHITECTURE, below).
+
+If nsPlugin is linked, all tests are executed inside a network
+namespace to prevent conflicts within the host.
+
+Running tdc without any arguments will run all tests. Refer to the section
+on command line arguments for more information, or run:
+ ./tdc.py -h
+
+tdc will list the test names as they are being run, and print a summary in
+TAP (Test Anything Protocol) format when they are done. If tests fail,
+output captured from the failing test will be printed immediately following
+the failed test in the TAP output.
+
+
+OVERVIEW OF TDC EXECUTION
+-------------------------
+
+One run of tests is considered a "test suite" (this will be refined in the
+future). A test suite has one or more test cases in it.
+
+A test case has four stages:
+
+ - setup
+ - execute
+ - verify
+ - teardown
+
+The setup and teardown stages can run zero or more commands. The setup
+stage does some setup if the test needs it. The teardown stage undoes
+the setup and returns the system to a "neutral" state so any other test
+can be run next. These two stages require any commands run to return
+success, but do not otherwise verify the results.
+
+The execute and verify stages each run one command. The execute stage
+tests the return code against one or more acceptable values. The
+verify stage checks the return code for success, and also compares
+the stdout with a regular expression.
+
+Each of the commands in any stage will run in a shell instance.
+
+
+USER-DEFINED CONSTANTS
+----------------------
+
+The tdc_config.py file contains multiple values that can be altered to suit
+your needs. Any value in the NAMES dictionary can be altered without affecting
+the tests to be run. These values are used in the tc commands that will be
+executed as part of the test. More will be added as test cases require.
+
+Example:
+ $TC qdisc add dev $DEV1 ingress
+
+The NAMES values are used to substitute into the commands in the test cases.
+
+
+COMMAND LINE ARGUMENTS
+----------------------
+
+Run tdc.py -h to see the full list of available arguments.
+
+usage: tdc.py [-h] [-p PATH] [-D DIR [DIR ...]] [-f FILE [FILE ...]]
+ [-c [CATG [CATG ...]]] [-e ID [ID ...]] [-l] [-s] [-i] [-v] [-N]
+ [-d DEVICE] [-P] [-n] [-V]
+
+Linux TC unit tests
+
+optional arguments:
+ -h, --help show this help message and exit
+ -p PATH, --path PATH The full path to the tc executable to use
+ -v, --verbose Show the commands that are being run
+ -N, --notap Suppress tap results for command under test
+ -d DEVICE, --device DEVICE
+ Execute the test case in flower category
+ -P, --pause Pause execution just before post-suite stage
+
+selection:
+ select which test cases: files plus directories; filtered by categories
+ plus testids
+
+ -D DIR [DIR ...], --directory DIR [DIR ...]
+ Collect tests from the specified directory(ies)
+ (default [tc-tests])
+ -f FILE [FILE ...], --file FILE [FILE ...]
+ Run tests from the specified file(s)
+ -c [CATG [CATG ...]], --category [CATG [CATG ...]]
+ Run tests only from the specified category/ies, or if
+ no category/ies is/are specified, list known
+ categories.
+ -e ID [ID ...], --execute ID [ID ...]
+ Execute the specified test cases with specified IDs
+
+action:
+ select action to perform on selected test cases
+
+ -l, --list List all test cases, or those only within the
+ specified category
+ -s, --show Display the selected test cases
+ -i, --id Generate ID numbers for new test cases
+
+netns:
+ options for nsPlugin (run commands in net namespace)
+
+ -n, --namespace
+ Run commands in namespace as specified in tdc_config.py
+
+valgrind:
+ options for valgrindPlugin (run command under test under Valgrind)
+
+ -V, --valgrind Run commands under valgrind
+
+
+PLUGIN ARCHITECTURE
+-------------------
+
+There is now a plugin architecture, and some of the functionality that
+was in the tdc.py script has been moved into the plugins.
+
+The plugins are in the directory plugin-lib. The are executed from
+directory plugins. Put symbolic links from plugins to plugin-lib,
+and name them according to the order you want them to run.
+
+Example:
+
+bjb@bee:~/work/tc-testing$ ls -l plugins
+total 4
+lrwxrwxrwx 1 bjb bjb 27 Oct 4 16:12 10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
+lrwxrwxrwx 1 bjb bjb 25 Oct 12 17:55 20-nsPlugin.py -> ../plugin-lib/nsPlugin.py
+-rwxr-xr-x 1 bjb bjb 0 Sep 29 15:56 __init__.py
+
+The plugins are a subclass of TdcPlugin, defined in TdcPlugin.py and
+must be called "SubPlugin" so tdc can find them. They are
+distinguished from each other in the python program by their module
+name.
+
+This base class supplies "hooks" to run extra functions. These hooks are as follows:
+
+pre- and post-suite
+pre- and post-case
+pre- and post-execute stage
+adjust-command (runs in all stages and receives the stage name)
+
+The pre-suite hook receives the number of tests and an array of test ids.
+This allows you to dump out the list of skipped tests in the event of a
+failure during setup or teardown stage.
+
+The pre-case hook receives the ordinal number and test id of the current test.
+
+The adjust-command hook receives the stage id (see list below) and the
+full command to be executed. This allows for last-minute adjustment
+of the command.
+
+The stages are identified by the following strings:
+
+ - pre (pre-suite)
+ - setup
+ - command
+ - verify
+ - teardown
+ - post (post-suite)
+
+
+To write a plugin, you need to inherit from TdcPlugin in
+TdcPlugin.py. To use the plugin, you have to put the
+implementation file in plugin-lib, and add a symbolic link to it from
+plugins. It will be detected at run time and invoked at the
+appropriate times. There are a few examples in the plugin-lib
+directory:
+
+ - rootPlugin.py:
+ implements the enforcement of running as root
+ - nsPlugin.py:
+ sets up a network namespace and runs all commands in that namespace
+ - valgrindPlugin.py
+ runs each command in the execute stage under valgrind,
+ and checks for leaks.
+ This plugin will output an extra test for each test in the test file,
+ one is the existing output as to whether the test passed or failed,
+ and the other is a test whether the command leaked memory or not.
+ (This one is a preliminary version, it may not work quite right yet,
+ but the overall template is there and it should only need tweaks.)
+
+
+ACKNOWLEDGEMENTS
+----------------
+
+Thanks to:
+
+Jamal Hadi Salim, for providing valuable test cases
+Keara Leibovitz, who wrote the CLI test driver that I used as a base for the
+ first version of the tc testing suite. This work was presented at
+ Netdev 1.2 Tokyo in October 2016.
+Samir Hussain, for providing help while I dove into Python for the first time
+ and being a second eye for this code.
diff --git a/tools/testing/selftests/tc-testing/TODO.txt b/tools/testing/selftests/tc-testing/TODO.txt
new file mode 100644
index 000000000..c40698557
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/TODO.txt
@@ -0,0 +1,31 @@
+tc Testing Suite To-Do list:
+
+- Determine what tc features are supported in the kernel. If features are not
+ present, prevent the related categories from running.
+
+- Add support for multiple versions of tc to run successively
+
+- Improve error messages when tdc aborts its run. Partially done - still
+ need to better handle problems in pre- and post-suite.
+
+- Use python logger module for debug/verbose output
+
+- Allow tdc to write its results to file.
+ Maybe use python logger module for this too.
+
+- A better implementation of the "hooks". Currently, every plugin
+ will attempt to run a function at every hook point. Could be
+ changed so that plugin __init__ methods will register functions to
+ be run in the various predefined times. Then if a plugin does not
+ require action at a specific point, no penalty will be paid for
+ trying to run a function that will do nothing.
+
+- Proper exception handling - make an exception class and use it
+
+- a TestCase class, for easier testcase handling, searching, comparison
+
+- a TestSuite class
+ and a way to configure a test suite,
+ to automate running multiple "test suites" with different requirements
+
+- super simple test case example using ls, touch, etc
diff --git a/tools/testing/selftests/tc-testing/TdcPlugin.py b/tools/testing/selftests/tc-testing/TdcPlugin.py
new file mode 100644
index 000000000..3ee9a6dac
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/TdcPlugin.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+
+class TdcPlugin:
+ def __init__(self):
+ super().__init__()
+ print(' -- {}.__init__'.format(self.sub_class))
+
+ def pre_suite(self, testcount, testidlist):
+ '''run commands before test_runner goes into a test loop'''
+ self.testcount = testcount
+ self.testidlist = testidlist
+ if self.args.verbose > 1:
+ print(' -- {}.pre_suite'.format(self.sub_class))
+
+ def post_suite(self, index):
+ '''run commands after test_runner completes the test loop
+ index is the last ordinal number of test that was attempted'''
+ if self.args.verbose > 1:
+ print(' -- {}.post_suite'.format(self.sub_class))
+
+ def pre_case(self, test_ordinal, testid):
+ '''run commands before test_runner does one test'''
+ if self.args.verbose > 1:
+ print(' -- {}.pre_case'.format(self.sub_class))
+ self.args.testid = testid
+ self.args.test_ordinal = test_ordinal
+
+ def post_case(self):
+ '''run commands after test_runner does one test'''
+ if self.args.verbose > 1:
+ print(' -- {}.post_case'.format(self.sub_class))
+
+ def pre_execute(self):
+ '''run command before test-runner does the execute step'''
+ if self.args.verbose > 1:
+ print(' -- {}.pre_execute'.format(self.sub_class))
+
+ def post_execute(self):
+ '''run command after test-runner does the execute step'''
+ if self.args.verbose > 1:
+ print(' -- {}.post_execute'.format(self.sub_class))
+
+ def adjust_command(self, stage, command):
+ '''adjust the command'''
+ if self.args.verbose > 1:
+ print(' -- {}.adjust_command {}'.format(self.sub_class, stage))
+
+ # if stage == 'pre':
+ # pass
+ # elif stage == 'setup':
+ # pass
+ # elif stage == 'execute':
+ # pass
+ # elif stage == 'verify':
+ # pass
+ # elif stage == 'teardown':
+ # pass
+ # elif stage == 'post':
+ # pass
+ # else:
+ # pass
+
+ return command
+
+ def add_args(self, parser):
+ '''Get the plugin args from the command line'''
+ self.argparser = parser
+ return self.argparser
+
+ def check_args(self, args, remaining):
+ '''Check that the args are set correctly'''
+ self.args = args
+ if self.args.verbose > 1:
+ print(' -- {}.check_args'.format(self.sub_class))
diff --git a/tools/testing/selftests/tc-testing/bpf/Makefile b/tools/testing/selftests/tc-testing/bpf/Makefile
new file mode 100644
index 000000000..dc92eb271
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/bpf/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+
+APIDIR := ../../../../include/uapi
+TEST_GEN_FILES = action.o
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+CLANG ?= clang
+LLC ?= llc
+PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
+
+ifeq ($(PROBE),)
+ CPU ?= probe
+else
+ CPU ?= generic
+endif
+
+CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+
+CLANG_FLAGS = -I. -I$(APIDIR) \
+ $(CLANG_SYS_INCLUDES) \
+ -Wno-compare-distinct-pointer-types
+
+$(OUTPUT)/%.o: %.c
+ $(CLANG) $(CLANG_FLAGS) \
+ -O2 -target bpf -emit-llvm -c $< -o - | \
+ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
diff --git a/tools/testing/selftests/tc-testing/bpf/action.c b/tools/testing/selftests/tc-testing/bpf/action.c
new file mode 100644
index 000000000..c32b99b80
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/bpf/action.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Davide Caratti, Red Hat inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+
+__attribute__((section("action-ok"),used)) int action_ok(struct __sk_buff *s)
+{
+ return TC_ACT_OK;
+}
+
+__attribute__((section("action-ko"),used)) int action_ko(struct __sk_buff *s)
+{
+ s->data = 0x0;
+ return TC_ACT_OK;
+}
+
+char _license[] __attribute__((section("license"),used)) = "GPL";
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
new file mode 100644
index 000000000..203302065
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/config
@@ -0,0 +1,48 @@
+CONFIG_NET_SCHED=y
+
+#
+# Queueing/Scheduling
+#
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_INGRESS=m
+
+#
+# Classification
+#
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_STACK=32
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_EMATCH_IPSET=m
+CONFIG_NET_EMATCH_IPT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_SAMPLE=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_VLAN=m
+CONFIG_NET_ACT_BPF=m
+CONFIG_NET_ACT_CONNMARK=m
+CONFIG_NET_ACT_SKBMOD=m
+CONFIG_NET_ACT_IFE=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_IFE_SKBMARK=m
+CONFIG_NET_IFE_SKBPRIO=m
+CONFIG_NET_IFE_SKBTCINDEX=m
+CONFIG_NET_CLS_IND=y
+CONFIG_NET_SCH_FIFO=y
diff --git a/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt
new file mode 100644
index 000000000..c18f88d09
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt
@@ -0,0 +1,104 @@
+tdc - Adding plugins for tdc
+
+Author: Brenda J. Butler - bjb@mojatatu.com
+
+ADDING PLUGINS
+--------------
+
+A new plugin should be written in python as a class that inherits from TdcPlugin.
+There are some examples in plugin-lib.
+
+The plugin can be used to add functionality to the test framework,
+such as:
+
+- adding commands to be run before and/or after the test suite
+- adding commands to be run before and/or after the test cases
+- adding commands to be run before and/or after the execute phase of the test cases
+- ability to alter the command to be run in any phase:
+ pre (the pre-suite stage)
+ prepare
+ execute
+ verify
+ teardown
+ post (the post-suite stage)
+- ability to add to the command line args, and use them at run time
+
+
+The functions in the class should follow the following interfaces:
+
+ def __init__(self)
+ def pre_suite(self, testcount, testidlist) # see "PRE_SUITE" below
+ def post_suite(self, ordinal) # see "SKIPPING" below
+ def pre_case(self, test_ordinal, testid) # see "PRE_CASE" below
+ def post_case(self)
+ def pre_execute(self)
+ def post_execute(self)
+ def adjust_command(self, stage, command) # see "ADJUST" below
+ def add_args(self, parser) # see "ADD_ARGS" below
+ def check_args(self, args, remaining) # see "CHECK_ARGS" below
+
+
+PRE_SUITE
+
+This method takes a testcount (number of tests to be run) and
+testidlist (array of test ids for tests that will be run). This is
+useful for various things, including when an exception occurs and the
+rest of the tests must be skipped. The info is stored in the object,
+and the post_suite method can refer to it when dumping the "skipped"
+TAP output. The tdc.py script will do that for the test suite as
+defined in the test case, but if the plugin is being used to run extra
+tests on each test (eg, check for memory leaks on associated
+co-processes) then that other tap output can be generated in the
+post-suite method using this info passed in to the pre_suite method.
+
+
+SKIPPING
+
+The post_suite method will receive the ordinal number of the last
+test to be attempted. It can use this info when outputting
+the TAP output for the extra test cases.
+
+
+PRE_CASE
+
+The pre_case method will receive the ordinal number of the test
+and the test id. Useful for outputing the extra test results.
+
+
+ADJUST
+
+The adjust_command method receives a string representing
+the execution stage and a string which is the actual command to be
+executed. The plugin can adjust the command, based on the stage of
+execution.
+
+The stages are represented by the following strings:
+
+ 'pre'
+ 'setup'
+ 'command'
+ 'verify'
+ 'teardown'
+ 'post'
+
+The adjust_command method must return the adjusted command so tdc
+can use it.
+
+
+ADD_ARGS
+
+The add_args method receives the argparser object and can add
+arguments to it. Care should be taken that the new arguments do not
+conflict with any from tdc.py or from other plugins that will be used
+concurrently.
+
+The add_args method should return the argparser object.
+
+
+CHECK_ARGS
+
+The check_args method is so that the plugin can do validation on
+the args, if needed. If there is a problem, and Exception should
+be raised, with a string that explains the problem.
+
+eg: raise Exception('plugin xxx, arg -y is wrong, fix it')
diff --git a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
new file mode 100644
index 000000000..17b267ded
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
@@ -0,0 +1,100 @@
+tdc - Adding test cases for tdc
+
+Author: Lucas Bates - lucasb@mojatatu.com
+
+ADDING TEST CASES
+-----------------
+
+User-defined tests should be added by defining a separate JSON file. This
+will help prevent conflicts when updating the repository. Refer to
+template.json for the required JSON format for test cases.
+
+Include the 'id' field, but do not assign a value. Running tdc with the -i
+option will generate a unique ID for that test case.
+
+tdc will recursively search the 'tc-tests' subdirectory (or the
+directories named with the -D option) for .json files. Any test case
+files you create in these directories will automatically be included.
+If you wish to store your custom test cases elsewhere, be sure to run
+tdc with the -f argument and the path to your file, or the -D argument
+and the path to your directory(ies).
+
+Be aware of required escape characters in the JSON data - particularly
+when defining the match pattern. Refer to the supplied json test files
+for examples when in doubt. The match pattern is written in json, and
+will be used by python. So the match pattern will be a python regular
+expression, but should be written using json syntax.
+
+
+TEST CASE STRUCTURE
+-------------------
+
+Each test case has required data:
+
+id: A unique alphanumeric value to identify a particular test case
+name: Descriptive name that explains the command under test
+category: A list of single-word descriptions covering what the command
+ under test is testing. Example: filter, actions, u32, gact, etc.
+setup: The list of commands required to ensure the command under test
+ succeeds. For example: if testing a filter, the command to create
+ the qdisc would appear here.
+ This list can be empty.
+ Each command can be a string to be executed, or a list consisting
+ of a string which is a command to be executed, followed by 1 or
+ more acceptable exit codes for this command.
+ If only a string is given for the command, then an exit code of 0
+ will be expected.
+cmdUnderTest: The tc command being tested itself.
+expExitCode: The code returned by the command under test upon its termination.
+ tdc will compare this value against the actual returned value.
+verifyCmd: The tc command to be run to verify successful execution.
+ For example: if the command under test creates a gact action,
+ verifyCmd should be "$TC actions show action gact"
+matchPattern: A regular expression to be applied against the output of the
+ verifyCmd to prove the command under test succeeded. This pattern
+ should be as specific as possible so that a false positive is not
+ matched.
+matchCount: How many times the regex in matchPattern should match. A value
+ of 0 is acceptable.
+teardown: The list of commands to clean up after the test is completed.
+ The environment should be returned to the same state as when
+ this test was started: qdiscs deleted, actions flushed, etc.
+ This list can be empty.
+ Each command can be a string to be executed, or a list consisting
+ of a string which is a command to be executed, followed by 1 or
+ more acceptable exit codes for this command.
+ If only a string is given for the command, then an exit code of 0
+ will be expected.
+
+
+SETUP/TEARDOWN ERRORS
+---------------------
+
+If an error is detected during the setup/teardown process, execution of the
+tests will immediately stop with an error message and the namespace in which
+the tests are run will be destroyed. This is to prevent inaccurate results
+in the test cases. tdc will output a series of TAP results for the skipped
+tests.
+
+Repeated failures of the setup/teardown may indicate a problem with the test
+case, or possibly even a bug in one of the commands that are not being tested.
+
+It's possible to include acceptable exit codes with the setup/teardown command
+so that it doesn't halt the script for an error that doesn't matter. Turn the
+individual command into a list, with the command being first, followed by all
+acceptable exit codes for the command.
+
+Example:
+
+A pair of setup commands. The first can have exit code 0, 1 or 255, the
+second must have exit code 0.
+
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action reclassify index 65536"
+ ],
diff --git a/tools/testing/selftests/tc-testing/creating-testcases/example.json b/tools/testing/selftests/tc-testing/creating-testcases/example.json
new file mode 100644
index 000000000..5ec501200
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/creating-testcases/example.json
@@ -0,0 +1,55 @@
+[
+ {
+ "id": "1f",
+ "name": "simple test to test framework",
+ "category": [
+ "example"
+ ],
+ "setup": [
+ "mkdir mytest"
+ ],
+ "cmdUnderTest": "touch mytest/blorfl",
+ "expExitCode": "0",
+ "verifyCmd": "ls mytest/* | grep '[b]lorfl'",
+ "matchPattern": "orfl",
+ "matchCount": "1",
+ "teardown": [
+ "rm -rf mytest"
+ ]
+ },
+ {
+ "id": "2f",
+ "name": "simple test, no need for verify",
+ "category": [
+ "example"
+ ],
+ "setup": [
+ "mkdir mytest",
+ "touch mytest/blorfl"
+ ],
+ "cmdUnderTest": "ls mytest/blorfl",
+ "expExitCode": "0",
+ "verifyCmd": "/bin/true",
+ "matchPattern": " ",
+ "matchCount": "0",
+ "teardown": [
+ "rm -rf mytest"
+ ]
+ },
+ {
+ "id": "3f",
+ "name": "simple test, no need for setup or teardown (or verify)",
+ "category": [
+ "example"
+ ],
+ "setup": [
+ ],
+ "cmdUnderTest": "ip l l lo",
+ "expExitCode": "0",
+ "verifyCmd": "/bin/true",
+ "matchPattern": " ",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/creating-testcases/template.json b/tools/testing/selftests/tc-testing/creating-testcases/template.json
new file mode 100644
index 000000000..8b99b86d6
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/creating-testcases/template.json
@@ -0,0 +1,51 @@
+[
+ {
+ "id": "",
+ "name": "",
+ "category": [
+ "",
+ ""
+ ],
+ "setup": [
+ ""
+ ],
+ "cmdUnderTest": "",
+ "expExitCode": "",
+ "verifyCmd": "",
+ "matchPattern": "",
+ "matchCount": "",
+ "teardown": [
+ ""
+ ]
+ },
+ {
+ "id": "",
+ "name": "",
+ "category": [
+ "",
+ ""
+ ],
+ "setup": [
+ "",
+ [
+ "",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "",
+ "expExitCode": "",
+ "verifyCmd": "",
+ "matchPattern": "",
+ "matchCount": "",
+ "teardown": [
+ "",
+ [
+ "",
+ 0,
+ 255
+ ]
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS
new file mode 100644
index 000000000..aa8a26697
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS
@@ -0,0 +1,27 @@
+tdc.py will look for plugins in a directory plugins off the cwd.
+Make a set of numbered symbolic links from there to the actual plugins.
+Eg:
+
+tdc.py
+plugin-lib/
+plugins/
+ __init__.py
+ 10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
+ 20-valgrindPlugin.py -> ../plugin-lib/valgrindPlugin.py
+ 30-nsPlugin.py -> ../plugin-lib/nsPlugin.py
+
+
+tdc.py will find them and use them.
+
+
+rootPlugin
+ Check if the uid is root. If not, bail out.
+
+valgrindPlugin
+ Run the command under test with valgrind, and produce an extra set of TAP results for the memory tests.
+ This plugin will write files to the cwd, called vgnd-xxx.log. These will contain
+ the valgrind output for test xxx. Any file matching the glob 'vgnd-*.log' will be
+ deleted at the end of the run.
+
+nsPlugin
+ Run all the commands in a network namespace.
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
new file mode 100644
index 000000000..a194b1af2
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -0,0 +1,141 @@
+import os
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+class SubPlugin(TdcPlugin):
+ def __init__(self):
+ self.sub_class = 'ns/SubPlugin'
+ super().__init__()
+
+ def pre_suite(self, testcount, testidlist):
+ '''run commands before test_runner goes into a test loop'''
+ super().pre_suite(testcount, testidlist)
+
+ if self.args.namespace:
+ self._ns_create()
+
+ def post_suite(self, index):
+ '''run commands after test_runner goes into a test loop'''
+ super().post_suite(index)
+ if self.args.verbose:
+ print('{}.post_suite'.format(self.sub_class))
+
+ if self.args.namespace:
+ self._ns_destroy()
+
+ def add_args(self, parser):
+ super().add_args(parser)
+ self.argparser_group = self.argparser.add_argument_group(
+ 'netns',
+ 'options for nsPlugin(run commands in net namespace)')
+ self.argparser_group.add_argument(
+ '-n', '--namespace', action='store_true',
+ help='Run commands in namespace')
+ return self.argparser
+
+ def adjust_command(self, stage, command):
+ super().adjust_command(stage, command)
+ cmdform = 'list'
+ cmdlist = list()
+
+ if not self.args.namespace:
+ return command
+
+ if self.args.verbose:
+ print('{}.adjust_command'.format(self.sub_class))
+
+ if not isinstance(command, list):
+ cmdform = 'str'
+ cmdlist = command.split()
+ else:
+ cmdlist = command
+ if stage == 'setup' or stage == 'execute' or stage == 'verify' or stage == 'teardown':
+ if self.args.verbose:
+ print('adjust_command: stage is {}; inserting netns stuff in command [{}] list [{}]'.format(stage, command, cmdlist))
+ cmdlist.insert(0, self.args.NAMES['NS'])
+ cmdlist.insert(0, 'exec')
+ cmdlist.insert(0, 'netns')
+ cmdlist.insert(0, 'ip')
+ else:
+ pass
+
+ if cmdform == 'str':
+ command = ' '.join(cmdlist)
+ else:
+ command = cmdlist
+
+ if self.args.verbose:
+ print('adjust_command: return command [{}]'.format(command))
+ return command
+
+ def _ns_create(self):
+ '''
+ Create the network namespace in which the tests will be run and set up
+ the required network devices for it.
+ '''
+ if self.args.namespace:
+ cmd = 'ip netns add {}'.format(self.args.NAMES['NS'])
+ self._exec_cmd('pre', cmd)
+ cmd = 'ip link add $DEV0 type veth peer name $DEV1'
+ self._exec_cmd('pre', cmd)
+ cmd = 'ip link set $DEV1 netns {}'.format(self.args.NAMES['NS'])
+ self._exec_cmd('pre', cmd)
+ cmd = 'ip link set $DEV0 up'
+ self._exec_cmd('pre', cmd)
+ cmd = 'ip -n {} link set $DEV1 up'.format(self.args.NAMES['NS'])
+ self._exec_cmd('pre', cmd)
+ if self.args.device:
+ cmd = 'ip link set $DEV2 netns {}'.format(self.args.NAMES['NS'])
+ self._exec_cmd('pre', cmd)
+ cmd = 'ip -n {} link set $DEV2 up'.format(self.args.NAMES['NS'])
+ self._exec_cmd('pre', cmd)
+
+ def _ns_destroy(self):
+ '''
+ Destroy the network namespace for testing (and any associated network
+ devices as well)
+ '''
+ if self.args.namespace:
+ cmd = 'ip netns delete {}'.format(self.args.NAMES['NS'])
+ self._exec_cmd('post', cmd)
+
+ def _exec_cmd(self, stage, command):
+ '''
+ Perform any required modifications on an executable command, then run
+ it in a subprocess and return the results.
+ '''
+ if '$' in command:
+ command = self._replace_keywords(command)
+
+ self.adjust_command(stage, command)
+ if self.args.verbose:
+ print('_exec_cmd: command "{}"'.format(command))
+ proc = subprocess.Popen(command,
+ shell=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=ENVIR)
+ (rawout, serr) = proc.communicate()
+
+ if proc.returncode != 0 and len(serr) > 0:
+ foutput = serr.decode("utf-8")
+ else:
+ foutput = rawout.decode("utf-8")
+
+ proc.stdout.close()
+ proc.stderr.close()
+ return proc, foutput
+
+ def _replace_keywords(self, cmd):
+ """
+ For a given executable command, substitute any known
+ variables contained within NAMES with the correct values
+ """
+ tcmd = Template(cmd)
+ subcmd = tcmd.safe_substitute(self.args.NAMES)
+ return subcmd
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
new file mode 100644
index 000000000..e36775bd4
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
@@ -0,0 +1,19 @@
+import os
+import sys
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+
+class SubPlugin(TdcPlugin):
+ def __init__(self):
+ self.sub_class = 'root/SubPlugin'
+ super().__init__()
+
+ def pre_suite(self, testcount, testidlist):
+ # run commands before test_runner goes into a test loop
+ super().pre_suite(testcount, testidlist)
+
+ if os.geteuid():
+ print('This script must be run with root privileges', file=sys.stderr)
+ exit(1)
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
new file mode 100644
index 000000000..477a7bd7d
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
@@ -0,0 +1,142 @@
+'''
+run the command under test, under valgrind and collect memory leak info
+as a separate test.
+'''
+
+
+import os
+import re
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+def vp_extract_num_from_string(num_as_string_maybe_with_commas):
+ return int(num_as_string_maybe_with_commas.replace(',',''))
+
+class SubPlugin(TdcPlugin):
+ def __init__(self):
+ self.sub_class = 'valgrind/SubPlugin'
+ self.tap = ''
+ super().__init__()
+
+ def pre_suite(self, testcount, testidlist):
+ '''run commands before test_runner goes into a test loop'''
+ super().pre_suite(testcount, testidlist)
+ if self.args.verbose > 1:
+ print('{}.pre_suite'.format(self.sub_class))
+ if self.args.valgrind:
+ self._add_to_tap('1..{}\n'.format(self.testcount))
+
+ def post_suite(self, index):
+ '''run commands after test_runner goes into a test loop'''
+ super().post_suite(index)
+ self._add_to_tap('\n|---\n')
+ if self.args.verbose > 1:
+ print('{}.post_suite'.format(self.sub_class))
+ print('{}'.format(self.tap))
+ if self.args.verbose < 4:
+ subprocess.check_output('rm -f vgnd-*.log', shell=True)
+
+ def add_args(self, parser):
+ super().add_args(parser)
+ self.argparser_group = self.argparser.add_argument_group(
+ 'valgrind',
+ 'options for valgrindPlugin (run command under test under Valgrind)')
+
+ self.argparser_group.add_argument(
+ '-V', '--valgrind', action='store_true',
+ help='Run commands under valgrind')
+
+ return self.argparser
+
+ def adjust_command(self, stage, command):
+ super().adjust_command(stage, command)
+ cmdform = 'list'
+ cmdlist = list()
+
+ if not self.args.valgrind:
+ return command
+
+ if self.args.verbose > 1:
+ print('{}.adjust_command'.format(self.sub_class))
+
+ if not isinstance(command, list):
+ cmdform = 'str'
+ cmdlist = command.split()
+ else:
+ cmdlist = command
+
+ if stage == 'execute':
+ if self.args.verbose > 1:
+ print('adjust_command: stage is {}; inserting valgrind stuff in command [{}] list [{}]'.
+ format(stage, command, cmdlist))
+ cmdlist.insert(0, '--track-origins=yes')
+ cmdlist.insert(0, '--show-leak-kinds=definite,indirect')
+ cmdlist.insert(0, '--leak-check=full')
+ cmdlist.insert(0, '--log-file=vgnd-{}.log'.format(self.args.testid))
+ cmdlist.insert(0, '-v') # ask for summary of non-leak errors
+ cmdlist.insert(0, ENVIR['VALGRIND_BIN'])
+ else:
+ pass
+
+ if cmdform == 'str':
+ command = ' '.join(cmdlist)
+ else:
+ command = cmdlist
+
+ if self.args.verbose > 1:
+ print('adjust_command: return command [{}]'.format(command))
+ return command
+
+ def post_execute(self):
+ if not self.args.valgrind:
+ return
+
+ self.definitely_lost_re = re.compile(
+ r'definitely lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\sblocks', re.MULTILINE | re.DOTALL)
+ self.indirectly_lost_re = re.compile(
+ r'indirectly lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
+ self.possibly_lost_re = re.compile(
+ r'possibly lost:\s+([,0-9]+)bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
+ self.non_leak_error_re = re.compile(
+ r'ERROR SUMMARY:\s+([,0-9]+) errors from\s+([,0-9]+)\s+contexts', re.MULTILINE | re.DOTALL)
+
+ def_num = 0
+ ind_num = 0
+ pos_num = 0
+ nle_num = 0
+
+ # what about concurrent test runs? Maybe force them to be in different directories?
+ with open('vgnd-{}.log'.format(self.args.testid)) as vfd:
+ content = vfd.read()
+ def_mo = self.definitely_lost_re.search(content)
+ ind_mo = self.indirectly_lost_re.search(content)
+ pos_mo = self.possibly_lost_re.search(content)
+ nle_mo = self.non_leak_error_re.search(content)
+
+ if def_mo:
+ def_num = int(def_mo.group(2))
+ if ind_mo:
+ ind_num = int(ind_mo.group(2))
+ if pos_mo:
+ pos_num = int(pos_mo.group(2))
+ if nle_mo:
+ nle_num = int(nle_mo.group(1))
+
+ mem_results = ''
+ if (def_num > 0) or (ind_num > 0) or (pos_num > 0) or (nle_num > 0):
+ mem_results += 'not '
+
+ mem_results += 'ok {} - {}-mem # {}\n'.format(
+ self.args.test_ordinal, self.args.testid, 'memory leak check')
+ self._add_to_tap(mem_results)
+ if mem_results.startswith('not '):
+ print('{}'.format(content))
+ self._add_to_tap(content)
+
+ def _add_to_tap(self, more_tap_output):
+ self.tap += more_tap_output
diff --git a/tools/testing/selftests/tc-testing/plugins/__init__.py b/tools/testing/selftests/tc-testing/plugins/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugins/__init__.py
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
new file mode 100644
index 000000000..1a9b282dd
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -0,0 +1,294 @@
+[
+ {
+ "id": "d959",
+ "name": "Add cBPF action with valid bytecode",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC action flush action bpf",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action get action bpf index 100",
+ "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 100 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action bpf"
+ ]
+ },
+ {
+ "id": "f84a",
+ "name": "Add cBPF action with invalid bytecode",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action bpf",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0' index 100",
+ "expExitCode": "255",
+ "verifyCmd": "$TC action get action bpf index 100",
+ "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 100 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action bpf"
+ ]
+ },
+ {
+ "id": "e939",
+ "name": "Add eBPF action with valid object-file",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ "make -C bpf",
+ [
+ "$TC action flush action bpf",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action get action bpf index 667",
+ "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action bpf",
+ "make -C bpf clean"
+ ]
+ },
+ {
+ "id": "282d",
+ "name": "Add eBPF action with invalid object-file",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ "make -C bpf",
+ [
+ "$TC action flush action bpf",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ko index 667",
+ "expExitCode": "255",
+ "verifyCmd": "$TC action get action bpf index 667",
+ "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ko\\] id [0-9].*index 667 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC action flush action bpf",
+ 0,
+ 1,
+ 255
+ ],
+ "make -C bpf clean"
+ ]
+ },
+ {
+ "id": "d819",
+ "name": "Replace cBPF bytecode and action control",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action bpf",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 555",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action replace action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 555",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action get action bpf index 555",
+ "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' default-action drop.*index 555 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action bpf"
+ ]
+ },
+ {
+ "id": "6ae3",
+ "name": "Delete cBPF action ",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action bpf",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 444",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action delete action bpf index 444",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action get action bpf index 444",
+ "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 444 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC action flush action bpf"
+ ]
+ },
+ {
+ "id": "3e0d",
+ "name": "List cBPF actions",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC action flush action bpf",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' ok index 101",
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 102",
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 33024,6 0 0 262144,6 0 0 0' continue index 103"
+ ],
+ "cmdUnderTest": "$TC action list action bpf",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action list action bpf",
+ "matchPattern": "action order [0-9]*: bpf bytecode",
+ "matchCount": "3",
+ "teardown": [
+ "$TC actions flush action bpf"
+ ]
+ },
+ {
+ "id": "55ce",
+ "name": "Flush BPF actions",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action bpf",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' ok index 101",
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 102",
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 33024,6 0 0 262144,6 0 0 0' continue index 103"
+ ],
+ "cmdUnderTest": "$TC action flush action bpf",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action list action bpf",
+ "matchPattern": "action order [0-9]*: bpf bytecode",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action bpf"
+ ]
+ },
+ {
+ "id": "ccc3",
+ "name": "Add cBPF action with duplicate index",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action bpf",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 4294967295"
+ ],
+ "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967295",
+ "expExitCode": "255",
+ "verifyCmd": "$TC action get action bpf index 4294967295",
+ "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 4294967295",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action bpf"
+ ]
+ },
+ {
+ "id": "89c7",
+ "name": "Add cBPF action with invalid index",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action bpf",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967296 cookie 12345",
+ "expExitCode": "255",
+ "verifyCmd": "$TC action ls action bpf",
+ "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*cookie 12345",
+ "matchCount": "0",
+ "teardown": [
+ "$TC action flush action bpf"
+ ]
+ },
+ {
+ "id": "7ab9",
+ "name": "Add cBPF action with cookie",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action bpf",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' cookie d0d0d0d0d0d0d0d0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action list action bpf",
+ "matchPattern": "action order [0-9]*: bpf.*cookie d0d0d0d0d0d0d0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action bpf"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
new file mode 100644
index 000000000..13147a1f5
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
@@ -0,0 +1,291 @@
+[
+ {
+ "id": "2002",
+ "name": "Add valid connmark action with defaults",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action connmark",
+ "matchPattern": "action order [0-9]+: connmark zone 0 pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "56a5",
+ "name": "Add valid connmark action with control pass",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark pass index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action connmark index 1",
+ "matchPattern": "action order [0-9]+: connmark zone 0 pass.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "7c66",
+ "name": "Add valid connmark action with control drop",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark drop index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action connmark index 100",
+ "matchPattern": "action order [0-9]+: connmark zone 0 drop.*index 100 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "a913",
+ "name": "Add valid connmark action with control pipe",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark pipe index 455",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action connmark index 455",
+ "matchPattern": "action order [0-9]+: connmark zone 0 pipe.*index 455 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "bdd8",
+ "name": "Add valid connmark action with control reclassify",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark reclassify index 7",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action connmark",
+ "matchPattern": "action order [0-9]+: connmark zone 0 reclassify.*index 7 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "b8be",
+ "name": "Add valid connmark action with control continue",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark continue index 17",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action connmark",
+ "matchPattern": "action order [0-9]+: connmark zone 0 continue.*index 17 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "d8a6",
+ "name": "Add valid connmark action with control jump",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark jump 10 index 17",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action connmark",
+ "matchPattern": "action order [0-9]+: connmark zone 0 jump 10.*index 17 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "aae8",
+ "name": "Add valid connmark action with zone argument",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark zone 100 pipe index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action connmark index 1",
+ "matchPattern": "action order [0-9]+: connmark zone 100 pipe.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "2f0b",
+ "name": "Add valid connmark action with invalid zone argument",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark zone 65536 reclassify index 21",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action connmark index 1",
+ "matchPattern": "action order [0-9]+: connmark zone 65536 reclassify.*index 21 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "9305",
+ "name": "Add connmark action with unsupported argument",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark zone 655 unsupp_arg pass index 2",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action connmark index 2",
+ "matchPattern": "action order [0-9]+: connmark zone 655 unsupp_arg pass.*index 2 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "71ca",
+ "name": "Add valid connmark action and replace it",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action connmark zone 777 pass index 555"
+ ],
+ "cmdUnderTest": "$TC actions replace action connmark zone 555 reclassify index 555",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action connmark index 555",
+ "matchPattern": "action order [0-9]+: connmark zone 555 reclassify.*index 555 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "5f8f",
+ "name": "Add valid connmark action with cookie",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark zone 555 pipe index 5 cookie aabbccddeeff112233445566778800a1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action connmark index 5",
+ "matchPattern": "action order [0-9]+: connmark zone 555 pipe.*index 5 ref.*cookie aabbccddeeff112233445566778800a1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
new file mode 100644
index 000000000..a022792d3
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
@@ -0,0 +1,504 @@
+[
+ {
+ "id": "6d84",
+ "name": "Add csum iph action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum iph index 800",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 800",
+ "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 800 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "1862",
+ "name": "Add csum ip4h action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum ip4h index 7",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 7",
+ "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 7 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "15c6",
+ "name": "Add csum ipv4h action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum ipv4h index 1122",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 1122",
+ "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 1122 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "bf47",
+ "name": "Add csum icmp action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum icmp index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 1",
+ "matchPattern": "action order [0-9]*: csum \\(icmp\\) action pass.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "cc1d",
+ "name": "Add csum igmp action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum igmp index 999",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 999",
+ "matchPattern": "action order [0-9]*: csum \\(igmp\\) action pass.*index 999 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "bccc",
+ "name": "Add csum foobar action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum foobar index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action csum",
+ "matchPattern": "action order [0-9]*: csum \\(foobar\\) action pass.*index 1 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "3bb4",
+ "name": "Add csum tcp action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum tcp index 9999",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 9999",
+ "matchPattern": "action order [0-9]*: csum \\(tcp\\) action pass.*index 9999 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "759c",
+ "name": "Add csum udp action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum udp index 334455",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 334455",
+ "matchPattern": "action order [0-9]*: csum \\(udp\\) action pass.*index 334455 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "bdb6",
+ "name": "Add csum udp xor iph action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum udp xor iph index 3",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action csum",
+ "matchPattern": "action order [0-9]*: csum \\(udp xor iph\\) action pass.*index 3 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "c220",
+ "name": "Add csum udplite action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum udplite continue index 3",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 3",
+ "matchPattern": "action order [0-9]*: csum \\(udplite\\) action continue.*index 3 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "8993",
+ "name": "Add csum sctp action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum sctp index 777",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 777",
+ "matchPattern": "action order [0-9]*: csum \\(sctp\\) action pass.*index 777 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "b138",
+ "name": "Add csum ip & icmp action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum ip and icmp pipe index 123",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 123",
+ "matchPattern": "action order [0-9]*: csum \\(iph, icmp\\) action pipe.*index 123 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "eeda",
+ "name": "Add csum ip & sctp action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum ipv4h sctp continue index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 2",
+ "matchPattern": "action order [0-9]*: csum \\(iph, sctp\\) action continue.*index 2 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "0017",
+ "name": "Add csum udp or tcp action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum udp or tcp continue index 27",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 27",
+ "matchPattern": "action order [0-9]*: csum \\(tcp, udp\\) action continue.*index 27 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "b10b",
+ "name": "Add all 7 csum actions",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum icmp ip4h sctp igmp udplite udp tcp index 7",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 7",
+ "matchPattern": "action order [0-9]*: csum \\(iph, icmp, igmp, tcp, udp, udplite, sctp\\).*index 7 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "ce92",
+ "name": "Add csum udp action with cookie",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum udp pipe index 7 cookie 12345678",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 7",
+ "matchPattern": "action order [0-9]*: csum \\(udp\\) action pipe.*index 7.*cookie 12345678",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "912f",
+ "name": "Add csum icmp action with large cookie",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum icmp pipe index 17 cookie aabbccddeeff1122",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 17",
+ "matchPattern": "action order [0-9]*: csum \\(icmp\\) action pipe.*index 17.*cookie aabbccddeeff1122",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "879b",
+ "name": "Add batch of 32 csum tcp actions",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action csum",
+ "matchPattern": "^[ \t]+index [0-9]* ref",
+ "matchCount": "32",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "b4e9",
+ "name": "Delete batch of 32 csum actions",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ],
+ "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\""
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action csum",
+ "matchPattern": "^[ \t]+index [0-9]+ ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "0015",
+ "name": "Add batch of 32 csum tcp actions with large cookies",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action csum",
+ "matchPattern": "^[ \t]+index [0-9]* ref",
+ "matchCount": "32",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "989e",
+ "name": "Delete batch of 32 csum actions with large cookies",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ],
+ "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\""
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action csum",
+ "matchPattern": "^[ \t]+index [0-9]+ ref",
+ "matchCount": "0",
+ "teardown": []
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
new file mode 100644
index 000000000..68c91023c
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
@@ -0,0 +1,540 @@
+[
+ {
+ "id": "e89a",
+ "name": "Add valid pass action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pass index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action pass.*index 8 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "a02c",
+ "name": "Add valid pipe action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pipe index 6",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action pipe.*index 6 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "feef",
+ "name": "Add valid reclassify action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action reclassify index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action reclassify.*index 5 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "8a7a",
+ "name": "Add valid drop action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action drop index 30",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action drop.*index 30 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "9a52",
+ "name": "Add valid continue action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action continue index 432",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action continue.*index 432 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "d700",
+ "name": "Add invalid action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pump index 386",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action.*index 386 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "9215",
+ "name": "Add action with duplicate index",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action pipe index 15"
+ ],
+ "cmdUnderTest": "$TC actions add action drop index 15",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action drop.*index 15 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "798e",
+ "name": "Add action with index exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action drop index 4294967296",
+ "expExitCode": "255",
+ "verifyCmd": "actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action drop.*index 4294967296 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "22be",
+ "name": "Add action with index at 32-bit maximum",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action drop index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action drop.*index 4294967295 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "ac2a",
+ "name": "List actions",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action reclassify index 101",
+ "$TC actions add action reclassify index 102",
+ "$TC actions add action reclassify index 103",
+ "$TC actions add action reclassify index 104",
+ "$TC actions add action reclassify index 105"
+ ],
+ "cmdUnderTest": "$TC actions list action gact",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action reclassify",
+ "matchCount": "5",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "3edf",
+ "name": "Flush gact actions",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ "$TC actions add action reclassify index 101",
+ "$TC actions add action reclassify index 102",
+ "$TC actions add action reclassify index 103",
+ "$TC actions add action reclassify index 104",
+ "$TC actions add action reclassify index 105"
+ ],
+ "cmdUnderTest": "$TC actions flush action gact",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action reclassify",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "63ec",
+ "name": "Delete pass action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action pass index 1"
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action pass.*index 1 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "46be",
+ "name": "Delete pipe action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action pipe index 9"
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action pipe.*index 9 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "2e08",
+ "name": "Delete reclassify action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action reclassify index 65536"
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 65536",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action reclassify.*index 65536 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "99c4",
+ "name": "Delete drop action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action drop index 16"
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 16",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action drop.*index 16 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "fb6b",
+ "name": "Delete continue action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action continue index 32"
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 32",
+ "expExitCode": "0",
+ "verifyCmd": "actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action continue.*index 32 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "0eb3",
+ "name": "Delete non-existent action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 2",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "f02c",
+ "name": "Replace gact action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action drop index 10",
+ "$TC actions add action drop index 12"
+ ],
+ "cmdUnderTest": "$TC actions replace action ok index 12",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action gact",
+ "matchPattern": "action order [0-9]*: gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "525f",
+ "name": "Get gact action by index",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action drop index 3900800700"
+ ],
+ "cmdUnderTest": "$TC actions get action gact index 3900800700",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action gact index 3900800700",
+ "matchPattern": "index 3900800700",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "1021",
+ "name": "Add batch of 32 gact pass actions",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action pass index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "^[ \t]+index [0-9]+ ref",
+ "matchCount": "32",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "da7a",
+ "name": "Add batch of 32 gact continue actions with cookie",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action continue index \\$i cookie aabbccddeeff112233445566778800a1 \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "^[ \t]+index [0-9]+ ref",
+ "matchCount": "32",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "8aa3",
+ "name": "Delete batch of 32 gact continue actions",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action continue index \\$i \\\"; args=\\\"\\$args\\$cmd\\\"; done && $TC actions add \\$args\""
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action gact index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "^[ \t]+index [0-9]+ ref",
+ "matchCount": "0",
+ "teardown": []
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
new file mode 100644
index 000000000..0da3545ca
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
@@ -0,0 +1,1064 @@
+[
+ {
+ "id": "7682",
+ "name": "Create valid ife encode action with mark and pass control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "ef47",
+ "name": "Create valid ife encode action with mark and pipe control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 10 pipe index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use mark.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "df43",
+ "name": "Create valid ife encode action with mark and continue control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow mark continue index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*allow mark.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "e4cf",
+ "name": "Create valid ife encode action with mark and drop control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 789 drop index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*use mark 789.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "ccba",
+ "name": "Create valid ife encode action with mark and reclassify control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 656768 reclassify index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use mark 656768.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "a1cf",
+ "name": "Create valid ife encode action with mark and jump control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 65 jump 1 index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action jump 1.*type 0[xX]ED3E.*use mark 65.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "cb3d",
+ "name": "Create valid ife encode action with mark value at 32-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295 reclassify index 90",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 90",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use mark 4294967295.*index 90",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "1efb",
+ "name": "Create ife encode action with mark value exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295999 pipe index 90",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 90",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use mark 4294967295999.*index 90",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "95ed",
+ "name": "Create valid ife encode action with prio and pass control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio pass index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow prio.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "aa17",
+ "name": "Create valid ife encode action with prio and pipe control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 7 pipe index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use prio 7.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "74c7",
+ "name": "Create valid ife encode action with prio and continue control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 3 continue index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use prio 3.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "7a97",
+ "name": "Create valid ife encode action with prio and drop control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio drop index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*allow prio.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "f66b",
+ "name": "Create valid ife encode action with prio and reclassify control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 998877 reclassify index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 998877.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "3056",
+ "name": "Create valid ife encode action with prio and jump control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 998877 jump 10 index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action jump 10.*type 0[xX]ED3E.*use prio 998877.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "7dd3",
+ "name": "Create valid ife encode action with prio value at 32-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 4294967295 reclassify index 99",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 99",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 4294967295.*index 99",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "2ca1",
+ "name": "Create ife encode action with prio value exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 4294967298 pipe index 99",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 99",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use prio 4294967298.*index 99",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "05bb",
+ "name": "Create valid ife encode action with tcindex and pass control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow tcindex pass index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow tcindex.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "ce65",
+ "name": "Create valid ife encode action with tcindex and pipe control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 111 pipe index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use tcindex 111.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "09cd",
+ "name": "Create valid ife encode action with tcindex and continue control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use tcindex 1.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "8eb5",
+ "name": "Create valid ife encode action with tcindex and continue control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use tcindex 1.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "451a",
+ "name": "Create valid ife encode action with tcindex and drop control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow tcindex drop index 77",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 77",
+ "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*allow tcindex.*index 77",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "d76c",
+ "name": "Create valid ife encode action with tcindex and reclassify control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow tcindex reclassify index 77",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 77",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*allow tcindex.*index 77",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "e731",
+ "name": "Create valid ife encode action with tcindex and jump control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow tcindex jump 999 index 77",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 77",
+ "matchPattern": "action order [0-9]*: ife encode action jump 999.*type 0[xX]ED3E.*allow tcindex.*index 77",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "b7b8",
+ "name": "Create valid ife encode action with tcindex value at 16-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 65535 pass index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*use tcindex 65535.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "d0d8",
+ "name": "Create ife encode action with tcindex value exceeding 16-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 65539 pipe index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use tcindex 65539.*index 1",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "2a9c",
+ "name": "Create valid ife encode action with mac src parameter",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow mark src 00:11:22:33:44:55 pipe index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow mark src 00:11:22:33:44:55.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "cf5c",
+ "name": "Create valid ife encode action with mac dst parameter",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 9876 dst 00:11:22:33:44:55 reclassify index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 9876 dst 00:11:22:33:44:55.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "2353",
+ "name": "Create valid ife encode action with mac src and mac dst parameters",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow tcindex src 00:aa:bb:cc:dd:ee dst 00:11:22:33:44:55 pass index 11",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 11",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow tcindex dst 00:11:22:33:44:55 src 00:aa:bb:cc:dd:ee .*index 11",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "552c",
+ "name": "Create valid ife encode action with mark and type parameters",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 7 type 0xfefe pass index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]FEFE.*use mark 7.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "0421",
+ "name": "Create valid ife encode action with prio and type parameters",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 444 type 0xabba pipe index 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 21",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ABBA.*use prio 444.*index 21",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "4017",
+ "name": "Create valid ife encode action with tcindex and type parameters",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 5000 type 0xabcd reclassify index 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 21",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ABCD.*use tcindex 5000.*index 21",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "fac3",
+ "name": "Create valid ife encode action with index at 32-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 4294967295",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 4294967295",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "7c25",
+ "name": "Create valid ife decode action with pass control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode pass index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife decode action pass.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "dccb",
+ "name": "Create valid ife decode action with pipe control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode pipe index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife decode action pipe.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "7bb9",
+ "name": "Create valid ife decode action with continue control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode continue index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife decode action continue.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "d9ad",
+ "name": "Create valid ife decode action with drop control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode drop index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife decode action drop.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "219f",
+ "name": "Create valid ife decode action with reclassify control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode reclassify index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife decode action reclassify.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "8f44",
+ "name": "Create valid ife decode action with jump control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode jump 10 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife decode action jump 10.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "56cf",
+ "name": "Create ife encode action with index exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295999",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4294967295999",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 4294967295999",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "ee94",
+ "name": "Create ife encode action with invalid control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow mark kuka index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action kuka.*type 0[xX]ED3E.*allow mark.*index 4",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "b330",
+ "name": "Create ife encode action with cookie",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio pipe index 4 cookie aabbccddeeff112233445566778800a1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow prio.*index 4.*cookie aabbccddeeff112233445566778800a1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "bbc0",
+ "name": "Create ife encode action with invalid argument",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow foo pipe index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow foo.*index 4",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "d54a",
+ "name": "Create ife encode action with invalid type argument",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio type 70000 pipe index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]11170.*allow prio.*index 4",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "7ee0",
+ "name": "Create ife encode action with invalid mac src argument",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio src 00:11:22:33:44:pp pipe index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "0a7d",
+ "name": "Create ife encode action with invalid mac dst argument",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio dst 00.111-22:33:44:aa pipe index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4",
+ "matchCount": "0",
+ "teardown": []
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
new file mode 100644
index 000000000..db49fd0f8
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -0,0 +1,438 @@
+[
+ {
+ "id": "5124",
+ "name": "Add mirred mirror to egress action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress mirror index 1 dev lo",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "6fb4",
+ "name": "Add mirred redirect to egress action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress redirect index 2 dev lo action pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Egress Redirect to device lo\\).*index 2 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred",
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "ba38",
+ "name": "Get mirred actions",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action mirred egress mirror index 1 dev lo",
+ "$TC actions add action mirred egress redirect index 2 dev lo"
+ ],
+ "cmdUnderTest": "$TC actions show action mirred",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "[Mirror|Redirect] to device lo",
+ "matchCount": "2",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "d7c0",
+ "name": "Add invalid mirred direction",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred inbound mirror index 20 dev lo",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(.*to device lo\\).*index 20 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "e213",
+ "name": "Add invalid mirred action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress remirror index 20 dev lo",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Egress.*to device lo\\).*index 20 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "2d89",
+ "name": "Add mirred action with invalid device",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress mirror index 20 dev eltoh",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(.*to device eltoh\\).*index 20 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "300b",
+ "name": "Add mirred action with duplicate index",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action mirred egress redirect index 15 dev lo"
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress mirror index 15 dev lo",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(.*to device lo\\).*index 15 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "8917",
+ "name": "Add mirred mirror action with control pass",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pass index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action mirred index 1",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pass.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "1054",
+ "name": "Add mirred mirror action with control pipe",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 15",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action mirred index 15",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 15 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "9887",
+ "name": "Add mirred mirror action with control continue",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo continue index 15",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action mirred index 15",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) continue.*index 15 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "e4aa",
+ "name": "Add mirred mirror action with control reclassify",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo reclassify index 150",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action mirred index 150",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) reclassify.*index 150 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "ece9",
+ "name": "Add mirred mirror action with control drop",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo drop index 99",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action mirred index 99",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) drop.*index 99 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "0031",
+ "name": "Add mirred mirror action with control jump",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo jump 10 index 99",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action mirred index 99",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) jump 10.*index 99 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "407c",
+ "name": "Add mirred mirror action with cookie",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo reclassify cookie aa11bb22cc33dd44ee55",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) reclassify.*cookie aa11bb22cc33dd44ee55",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "8b69",
+ "name": "Add mirred mirror action with index at 32-bit maximum",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action mirred index 4294967295",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 4294967295",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "3f66",
+ "name": "Add mirred mirror action with index exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 429496729555",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action mirred index 429496729555",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 429496729555",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "a70e",
+ "name": "Delete mirred mirror action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action mirred egress mirror index 5 dev lo"
+ ],
+ "cmdUnderTest": "$TC actions del action mirred index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*index 5 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "3fb3",
+ "name": "Delete mirred redirect action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action mirred egress redirect index 5 dev lo"
+ ],
+ "cmdUnderTest": "$TC actions del action mirred index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Egress Redirect to device lo\\).*index 5 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
new file mode 100644
index 000000000..0080dc2fd
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
@@ -0,0 +1,593 @@
+[
+ {
+ "id": "7565",
+ "name": "Add nat action on ingress with default control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 192.168.1.1 200.200.200.1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action nat",
+ "matchPattern": "action order [0-9]+: nat ingress 192.168.1.1/32 200.200.200.1 pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "fd79",
+ "name": "Add nat action on ingress with pipe control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 1.1.1.1 2.2.2.1 pipe index 77",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 77",
+ "matchPattern": "action order [0-9]+: nat ingress 1.1.1.1/32 2.2.2.1 pipe.*index 77 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "eab9",
+ "name": "Add nat action on ingress with continue control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 192.168.10.10 192.168.20.20 continue index 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 1000",
+ "matchPattern": "action order [0-9]+: nat ingress 192.168.10.10/32 192.168.20.20 continue.*index 1000 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "c53a",
+ "name": "Add nat action on ingress with reclassify control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 192.168.10.10 192.168.20.20 reclassify index 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 1000",
+ "matchPattern": "action order [0-9]+: nat ingress 192.168.10.10/32 192.168.20.20 reclassify.*index 1000 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "76c9",
+ "name": "Add nat action on ingress with jump control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 12.18.10.10 12.18.20.20 jump 10 index 22",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 22",
+ "matchPattern": "action order [0-9]+: nat ingress 12.18.10.10/32 12.18.20.20 jump 10.*index 22 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "24c6",
+ "name": "Add nat action on ingress with drop control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 1.18.1.1 1.18.2.2 drop index 722",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 722",
+ "matchPattern": "action order [0-9]+: nat ingress 1.18.1.1/32 1.18.2.2 drop.*index 722 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "2120",
+ "name": "Add nat action on ingress with maximum index value",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 1.18.1.1 1.18.2.2 index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 4294967295",
+ "matchPattern": "action order [0-9]+: nat ingress 1.18.1.1/32 1.18.2.2 pass.*index 4294967295 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "3e9d",
+ "name": "Add nat action on ingress with invalid index value",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 1.18.1.1 1.18.2.2 index 4294967295555",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action nat index 4294967295555",
+ "matchPattern": "action order [0-9]+: nat ingress 1.18.1.1/32 1.18.2.2 pass.*index 4294967295555 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "f6c9",
+ "name": "Add nat action on ingress with invalid IP address",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 1.1.1.1 1.1888.2.2 index 7",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action nat index 7",
+ "matchPattern": "action order [0-9]+: nat ingress 1.1.1.1/32 1.1888.2.2 pass.*index 7 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "be25",
+ "name": "Add nat action on ingress with invalid argument",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 1.1.1.1 1.18.2.2 another_arg index 12",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action nat index 12",
+ "matchPattern": "action order [0-9]+: nat ingress 1.1.1.1/32 1.18.2.2 pass.*another_arg.*index 12 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "a7bd",
+ "name": "Add nat action on ingress with DEFAULT IP address",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress default 10.10.10.1 index 12",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 12",
+ "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "ee1e",
+ "name": "Add nat action on ingress with ANY IP address",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress any 10.10.10.1 index 12",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 12",
+ "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "1de8",
+ "name": "Add nat action on ingress with ALL IP address",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress all 10.10.10.1 index 12",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 12",
+ "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "8dba",
+ "name": "Add nat action on egress with default control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action nat",
+ "matchPattern": "action order [0-9]+: nat egress 10.10.10.1/32 20.20.20.1 pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "19a7",
+ "name": "Add nat action on egress with pipe control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action nat",
+ "matchPattern": "action order [0-9]+: nat egress 10.10.10.1/32 20.20.20.1 pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "f1d9",
+ "name": "Add nat action on egress with continue control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 continue",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action nat",
+ "matchPattern": "action order [0-9]+: nat egress 10.10.10.1/32 20.20.20.1 continue",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "6d4a",
+ "name": "Add nat action on egress with reclassify control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 reclassify",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action nat",
+ "matchPattern": "action order [0-9]+: nat egress 10.10.10.1/32 20.20.20.1 reclassify",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "b313",
+ "name": "Add nat action on egress with jump control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 jump 777",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action nat",
+ "matchPattern": "action order [0-9]+: nat egress 10.10.10.1/32 20.20.20.1 jump 777",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "d9fc",
+ "name": "Add nat action on egress with drop control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action nat",
+ "matchPattern": "action order [0-9]+: nat egress 10.10.10.1/32 20.20.20.1 drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "a895",
+ "name": "Add nat action on egress with DEFAULT IP address",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress default 20.20.20.1 pipe index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 10",
+ "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "2572",
+ "name": "Add nat action on egress with ANY IP address",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress any 20.20.20.1 pipe index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 10",
+ "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "37f3",
+ "name": "Add nat action on egress with ALL IP address",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 10",
+ "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "6054",
+ "name": "Add nat action on egress with cookie",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10 cookie aa1bc2d3eeff112233445566778800a1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 10",
+ "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref.*cookie aa1bc2d3eeff112233445566778800a1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "79d6",
+ "name": "Add nat action on ingress with cookie",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 192.168.1.1 10.10.10.1 reclassify index 1 cookie 112233445566778899aabbccddeeff11",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 1",
+ "matchPattern": "action order [0-9]+: nat ingress 192.168.1.1/32 10.10.10.1 reclassify.*index 1 ref.*cookie 112233445566778899aabbccddeeff11",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
new file mode 100644
index 000000000..30f9b54bd
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -0,0 +1,719 @@
+[
+ {
+ "id": "49aa",
+ "name": "Add valid basic police action",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 1kbit burst 10k index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 1Kbit burst 10Kb",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "3abe",
+ "name": "Add police action with duplicate index",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action police rate 4Mbit burst 120k index 9"
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 8kbit burst 24k index 9",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "49fa",
+ "name": "Add valid police action with mtu",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 90kbit burst 10k mtu 1k index 98",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 98",
+ "matchPattern": "action order [0-9]*: police 0x62 rate 90Kbit burst 10Kb mtu 1Kb",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "7943",
+ "name": "Add valid police action with peakrate",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 90kbit burst 10k mtu 2kb peakrate 100kbit index 3",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x3 rate 90Kbit burst 10Kb mtu 2Kb peakrate 100Kbit",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "055e",
+ "name": "Add police action with peakrate and no mtu",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 5kbit burst 6kb peakrate 10kbit index 9",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x9 rate 5Kb burst 10Kb",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "f057",
+ "name": "Add police action with valid overhead",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 1mbit burst 100k overhead 64 index 64",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 64",
+ "matchPattern": "action order [0-9]*: police 0x40 rate 1Mbit burst 100Kb mtu 2Kb action reclassify overhead 64b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "7ffb",
+ "name": "Add police action with ethernet linklayer type",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 2mbit burst 200k linklayer ethernet index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions show action police",
+ "matchPattern": "action order [0-9]*: police 0x8 rate 2Mbit burst 200Kb mtu 2Kb action reclassify overhead 0b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "3dda",
+ "name": "Add police action with atm linklayer type",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 2mbit burst 200k linklayer atm index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions show action police",
+ "matchPattern": "action order [0-9]*: police 0x8 rate 2Mbit burst 200Kb mtu 2Kb action reclassify overhead 0b linklayer atm",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "551b",
+ "name": "Add police actions with conform-exceed control continue/drop",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed continue/drop index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 1",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 3Mbit burst 250Kb mtu 2Kb action continue/drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "0c70",
+ "name": "Add police actions with conform-exceed control pass/reclassify",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed pass/reclassify index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x4 rate 3Mbit burst 250Kb mtu 2Kb action pass/reclassify",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "d946",
+ "name": "Add police actions with conform-exceed control pass/pipe",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed pass/pipe index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x5 rate 3Mbit burst 250Kb mtu 2Kb action pass/pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "ddd6",
+ "name": "Add police action with invalid rate value",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 3tb burst 250k conform-exceed pass/pipe index 5",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x5 rate 3Tb burst 250Kb mtu 2Kb action pass/pipe",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "f61c",
+ "name": "Add police action with invalid burst value",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 3kbit burst 250P conform-exceed pass/pipe index 5",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x5 rate 3Kbit burst 250Pb mtu 2Kb action pass/pipe",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "6aaf",
+ "name": "Add police actions with conform-exceed control pass/pipe [with numeric values]",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed 0/3 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 1",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 3Mbit burst 250Kb mtu 2Kb action pass/pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "29b1",
+ "name": "Add police actions with conform-exceed control <invalid>/drop",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed 10/drop index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 3Mbit burst 250Kb mtu 2Kb action ",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "c26f",
+ "name": "Add police action with invalid peakrate value",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 90kbit burst 10k mtu 2kb peakrate 100T index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 90Kbit burst 10Kb mtu 2Kb peakrate 100Tbit",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "db04",
+ "name": "Add police action with invalid mtu value",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 10kbit burst 10k mtu 2Pbit index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 10Kbit burst 1Kb mtu 2Pb",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "f3c9",
+ "name": "Add police action with cookie",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 1 cookie a1b1c1d1e1f12233bb",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 1",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 10Mbit burst 10Kb mtu 2Kb.*cookie a1b1c1d1e1f12233bb",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "d190",
+ "name": "Add police action with maximum index",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 4294967295",
+ "matchPattern": "action order [0-9]*: police 0xffffffff rate 10Mbit burst 10Kb mtu 2Kb",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "336e",
+ "name": "Delete police action",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action police rate 5mbit burst 2m index 12"
+ ],
+ "cmdUnderTest": "$TC actions delete action police index 12",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0xc rate 5Mb burst 2Mb",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "77fa",
+ "name": "Get single police action from many actions",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action police rate 1mbit burst 100k index 1",
+ "$TC actions add action police rate 2mbit burst 200k index 2",
+ "$TC actions add action police rate 3mbit burst 300k index 3",
+ "$TC actions add action police rate 4mbit burst 400k index 4",
+ "$TC actions add action police rate 5mbit burst 500k index 5",
+ "$TC actions add action police rate 6mbit burst 600k index 6",
+ "$TC actions add action police rate 7mbit burst 700k index 7",
+ "$TC actions add action police rate 8mbit burst 800k index 8"
+ ],
+ "cmdUnderTest": "$TC actions get action police index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 4",
+ "matchPattern": "action order [0-9]*: police 0x4 rate 4Mbit burst 400Kb",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "aa43",
+ "name": "Get single police action without specifying index",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action police rate 1mbit burst 100k index 1"
+ ],
+ "cmdUnderTest": "$TC actions get action police",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action police",
+ "matchPattern": "action order [0-9]*: police",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "858b",
+ "name": "List police actions",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action police rate 1mbit burst 100k index 1",
+ "$TC actions add action police rate 2mbit burst 200k index 2",
+ "$TC actions add action police rate 3mbit burst 300k index 3",
+ "$TC actions add action police rate 4mbit burst 400k index 4",
+ "$TC actions add action police rate 5mbit burst 500k index 5",
+ "$TC actions add action police rate 6mbit burst 600k index 6",
+ "$TC actions add action police rate 7mbit burst 700k index 7",
+ "$TC actions add action police rate 8mbit burst 800k index 8"
+ ],
+ "cmdUnderTest": "$TC actions list action police",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x[1-8] rate [1-8]Mbit burst [1-8]00Kb",
+ "matchCount": "8",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "1c3a",
+ "name": "Flush police actions",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ "$TC actions add action police rate 1mbit burst 100k index 1",
+ "$TC actions add action police rate 2mbit burst 200k index 2",
+ "$TC actions add action police rate 3mbit burst 300k index 3",
+ "$TC actions add action police rate 4mbit burst 400k index 4",
+ "$TC actions add action police rate 5mbit burst 500k index 5",
+ "$TC actions add action police rate 6mbit burst 600k index 6",
+ "$TC actions add action police rate 7mbit burst 700k index 7",
+ "$TC actions add action police rate 8mbit burst 800k index 8"
+ ],
+ "cmdUnderTest": "$TC actions flush action police",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police",
+ "matchCount": "0",
+ "teardown": [
+ ""
+ ]
+ },
+ {
+ "id": "7326",
+ "name": "Add police action with control continue",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m continue index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 1",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action continue",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "34fa",
+ "name": "Add police action with control drop",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m drop index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "8dd5",
+ "name": "Add police action with control ok",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m ok index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "b9d1",
+ "name": "Add police action with control reclassify",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m reclassify index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 1",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action reclassify",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "c534",
+ "name": "Add police action with control pipe",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m pipe index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
new file mode 100644
index 000000000..618def9bd
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
@@ -0,0 +1,612 @@
+[
+ {
+ "id": "9784",
+ "name": "Add valid sample action with mandatory arguments",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 10 group 1 index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 2",
+ "matchPattern": "action order [0-9]+: sample rate 1/10 group 1.*index 2 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "5c91",
+ "name": "Add valid sample action with mandatory arguments and continue control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 700 group 2 continue index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 2",
+ "matchPattern": "action order [0-9]+: sample rate 1/700 group 2 continue.*index 2 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "334b",
+ "name": "Add valid sample action with mandatory arguments and drop control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 10000 group 11 drop index 22",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/10000 group 11 drop.*index 22 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "da69",
+ "name": "Add valid sample action with mandatory arguments and reclassify control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 20000 group 72 reclassify index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/20000 group 72 reclassify.*index 100 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "13ce",
+ "name": "Add valid sample action with mandatory arguments and pipe control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 20 group 2 pipe index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/20 group 2 pipe.*index 100 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "1886",
+ "name": "Add valid sample action with mandatory arguments and jump control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 700 group 25 jump 4 index 200",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 200",
+ "matchPattern": "action order [0-9]+: sample rate 1/700 group 25 jump 4.*index 200 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "7571",
+ "name": "Add sample action with invalid rate",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 0 group 1 index 2",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 2",
+ "matchPattern": "action order [0-9]+: sample rate 1/0 group 1.*index 2 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "b6d4",
+ "name": "Add sample action with mandatory arguments and invalid control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 200000 group 52 foo index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/200000 group 52 foo.*index 1 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "a874",
+ "name": "Add invalid sample action without mandatory arguments",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample.*index 1 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "ac01",
+ "name": "Add invalid sample action without mandatory argument rate",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample group 10 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample.*group 10.*index 1 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "4203",
+ "name": "Add invalid sample action without mandatory argument group",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 100 index 10",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "14a7",
+ "name": "Add invalid sample action without mandatory argument group",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 100 index 10",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "8f2e",
+ "name": "Add valid sample action with trunc argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 1024 index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 1024 pipe.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "45f8",
+ "name": "Add sample action with maximum rate argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 4294967295 group 4 index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/4294967295 group 4 pipe.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "ad0c",
+ "name": "Add sample action with maximum trunc argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 16000 group 4 trunc 4294967295 index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/16000 group 4 trunc_size 4294967295 pipe.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "83a9",
+ "name": "Add sample action with maximum group argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 4294 group 4294967295 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 1",
+ "matchPattern": "action order [0-9]+: sample rate 1/4294 group 4294967295 pipe.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "ed27",
+ "name": "Add sample action with invalid rate argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 4294967296 group 4 index 10",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/4294967296 group 4 pipe.*index 10 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "2eae",
+ "name": "Add sample action with invalid group argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 4098 group 5294967299 continue index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 1",
+ "matchPattern": "action order [0-9]+: sample rate 1/4098 group 5294967299 continue.*index 1 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "6ff3",
+ "name": "Add sample action with invalid trunc size",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 112233445566 index 11",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 11",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 112233445566.*index 11 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "2b2a",
+ "name": "Add sample action with invalid index",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 5294967299",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 5294967299",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 5294967299 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "dee2",
+ "name": "Add sample action with maximum allowed index",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 4294967295",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 4294967295 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "560e",
+ "name": "Add sample action with cookie",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 45 cookie aabbccdd",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 45",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 45.*cookie aabbccdd",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "704a",
+ "name": "Replace existing sample action with new rate argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action sample rate 1024 group 4 index 4"
+ ],
+ "cmdUnderTest": "$TC actions replace action sample rate 2048 group 4 index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/2048 group 4 pipe.*index 4",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "60eb",
+ "name": "Replace existing sample action with new group argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action sample rate 1024 group 4 index 4"
+ ],
+ "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "2cce",
+ "name": "Replace existing sample action with new trunc argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action sample rate 1024 group 4 trunc 48 index 4"
+ ],
+ "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 trunc 64 index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 trunc_size 64 pipe.*index 4",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "59d1",
+ "name": "Replace existing sample action with new control argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action sample rate 1024 group 4 reclassify index 4"
+ ],
+ "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 pipe index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
new file mode 100644
index 000000000..e89a7aa40
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
@@ -0,0 +1,130 @@
+[
+ {
+ "id": "b078",
+ "name": "Add simple action",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action simple sdata \"A triumph\" index 60",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <A triumph>.*index 60 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
+ "id": "6d4c",
+ "name": "Add simple action with duplicate index",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action simple sdata \"Aruba\" index 4"
+ ],
+ "cmdUnderTest": "$TC actions add action simple sdata \"Jamaica\" index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <Jamaica>.*ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
+ "id": "2542",
+ "name": "List simple actions",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action simple sdata \"Rock\"",
+ "$TC actions add action simple sdata \"Paper\"",
+ "$TC actions add action simple sdata \"Scissors\" index 98"
+ ],
+ "cmdUnderTest": "$TC actions list action simple",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <[A-Z][a-z]*>",
+ "matchCount": "3",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
+ "id": "ea67",
+ "name": "Delete simple action",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action simple sdata \"Blinkenlights\" index 1"
+ ],
+ "cmdUnderTest": "$TC actions delete action simple index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <Blinkenlights>.*index 1 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
+ "id": "8ff1",
+ "name": "Flush simple actions",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action simple sdata \"Kirk\"",
+ "$TC actions add action simple sdata \"Spock\" index 50",
+ "$TC actions add action simple sdata \"McCoy\" index 9"
+ ],
+ "cmdUnderTest": "$TC actions flush action simple",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <[A-Z][a-z]*>",
+ "matchCount": "0",
+ "teardown": [
+ ""
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
new file mode 100644
index 000000000..5aaf593b9
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
@@ -0,0 +1,488 @@
+[
+ {
+ "id": "6236",
+ "name": "Add skbedit action with valid mark",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit mark 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit mark 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "407b",
+ "name": "Add skbedit action with invalid mark",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit mark 666777888999",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit mark",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "081d",
+ "name": "Add skbedit action with priority",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit prio 99",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit priority :99",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "cc37",
+ "name": "Add skbedit action with invalid priority",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit prio foo",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit priority",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "3c95",
+ "name": "Add skbedit action with queue_mapping",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit queue_mapping 909",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit queue_mapping 909",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "985c",
+ "name": "Add skbedit action with invalid queue_mapping",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit queue_mapping 67000",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit queue_mapping",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "224f",
+ "name": "Add skbedit action with ptype host",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit ptype host",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit ptype host",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "d1a3",
+ "name": "Add skbedit action with ptype otherhost",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit ptype otherhost",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit ptype otherhost",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "b9c6",
+ "name": "Add skbedit action with invalid ptype",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit ptype openair",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit ptype openair",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "464a",
+ "name": "Add skbedit action with control pipe",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit ptype host pipe index 11",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbedit index 11",
+ "matchPattern": "action order [0-9]*: skbedit ptype host pipe.*index 11 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "212f",
+ "name": "Add skbedit action with control reclassify",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit mark 56789 reclassify index 90",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbedit index 90",
+ "matchPattern": "action order [0-9]*: skbedit mark 56789 reclassify.*index 90 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "0651",
+ "name": "Add skbedit action with control pass",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit queue_mapping 3 pass index 271",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbedit index 271",
+ "matchPattern": "action order [0-9]*: skbedit queue_mapping 3 pass.*index 271 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "cc53",
+ "name": "Add skbedit action with control drop",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit queue_mapping 3 drop index 271",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbedit index 271",
+ "matchPattern": "action order [0-9]*: skbedit queue_mapping 3 drop.*index 271 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "ec16",
+ "name": "Add skbedit action with control jump",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit priority 8 jump 9 index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbedit index 2",
+ "matchPattern": "action order [0-9]*: skbedit priority :8 jump 9.*index 2 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "db54",
+ "name": "Add skbedit action with control continue",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit priority 16 continue index 32",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbedit index 32",
+ "matchPattern": "action order [0-9]*: skbedit priority :16 continue.*index 32 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "1055",
+ "name": "Add skbedit action with cookie",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit priority 16 continue index 32 cookie deadbeef",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbedit index 32",
+ "matchPattern": "action order [0-9]*: skbedit priority :16 continue.*index 32 ref.*cookie deadbeef",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "5172",
+ "name": "List skbedit actions",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action skbedit ptype otherhost",
+ "$TC actions add action skbedit ptype broadcast",
+ "$TC actions add action skbedit mark 59",
+ "$TC actions add action skbedit mark 409"
+ ],
+ "cmdUnderTest": "$TC actions list action skbedit",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit",
+ "matchCount": "4",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "a6d6",
+ "name": "Add skbedit action with index",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit mark 808 index 4040404040",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "index 4040404040",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "38f3",
+ "name": "Delete skbedit action",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action skbedit mark 42 index 9009"
+ ],
+ "cmdUnderTest": "$TC actions del action skbedit index 9009",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit mark 42",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "ce97",
+ "name": "Flush skbedit actions",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ "$TC actions add action skbedit mark 500",
+ "$TC actions add action skbedit mark 501",
+ "$TC actions add action skbedit mark 502",
+ "$TC actions add action skbedit mark 503",
+ "$TC actions add action skbedit mark 504",
+ "$TC actions add action skbedit mark 505",
+ "$TC actions add action skbedit mark 506"
+ ],
+ "cmdUnderTest": "$TC actions flush action skbedit",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
new file mode 100644
index 000000000..fe3326e93
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
@@ -0,0 +1,396 @@
+[
+ {
+ "id": "7d50",
+ "name": "Add skbmod action to set destination mac",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set dmac 11:22:33:44:55:66 index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe set dmac 11:22:33:44:55:66\\s+index 5",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "9b29",
+ "name": "Add skbmod action to set source mac",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set smac 77:88:99:AA:BB:CC index 7",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 7",
+ "matchPattern": "action order [0-9]*: skbmod pipe set smac 77:88:99:aa:bb:cc\\s+index 7",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "1724",
+ "name": "Add skbmod action with invalid mac",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set smac 00:44:55:44:55",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe set smac 00:44:55:44:55",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "3cf1",
+ "name": "Add skbmod action with valid etype",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set etype 0xfefe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe set etype 0xFEFE",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "a749",
+ "name": "Add skbmod action with invalid etype",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set etype 0xfefef",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe set etype 0xFEFEF",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "bfe6",
+ "name": "Add skbmod action to swap mac",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod swap mac",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 1",
+ "matchPattern": "action order [0-9]*: skbmod pipe swap mac",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "839b",
+ "name": "Add skbmod action with control pipe",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod swap mac pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe swap mac",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "c167",
+ "name": "Add skbmod action with control reclassify",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set etype 0xbeef reclassify",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod reclassify set etype 0xBEEF",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "0c2f",
+ "name": "Add skbmod action with control drop",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set etype 0x0001 drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 1",
+ "matchPattern": "action order [0-9]*: skbmod drop set etype 0x1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "d113",
+ "name": "Add skbmod action with control continue",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set etype 0x1 continue",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod continue set etype 0x1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "7242",
+ "name": "Add skbmod action with control pass",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set smac 00:00:00:00:00:01 pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pass set smac 00:00:00:00:00:01",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "6046",
+ "name": "Add skbmod action with control reclassify and cookie",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set smac 00:01:02:03:04:01 reclassify index 1 cookie ddeeffaabb11cc22",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 1",
+ "matchPattern": "action order [0-9]*: skbmod reclassify set smac 00:01:02:03:04:01.*index 1 ref.*cookie ddeeffaabb11cc22",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "58cb",
+ "name": "List skbmod actions",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action skbmod set etype 0x0001",
+ "$TC actions add action skbmod set etype 0x0011",
+ "$TC actions add action skbmod set etype 0x0021",
+ "$TC actions add action skbmod set etype 0x0031",
+ "$TC actions add action skbmod set etype 0x0041"
+ ],
+ "cmdUnderTest": "$TC actions ls action skbmod",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod",
+ "matchCount": "5",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "9aa8",
+ "name": "Get a single skbmod action from a list",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action skbmod set etype 0x0001",
+ "$TC actions add action skbmod set etype 0x0011",
+ "$TC actions add action skbmod set etype 0x0021",
+ "$TC actions add action skbmod set etype 0x0031",
+ "$TC actions add action skbmod set etype 0x0041"
+ ],
+ "cmdUnderTest": "$TC actions ls action skbmod",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 4",
+ "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x31",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "e93a",
+ "name": "Delete an skbmod action",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action skbmod set etype 0x1111 index 909"
+ ],
+ "cmdUnderTest": "$TC actions del action skbmod index 909",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x1111\\s+index 909",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "40c2",
+ "name": "Flush skbmod actions",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ "$TC actions add action skbmod set etype 0x0001",
+ "$TC actions add action skbmod set etype 0x0011",
+ "$TC actions add action skbmod set etype 0x0021",
+ "$TC actions add action skbmod set etype 0x0031",
+ "$TC actions add action skbmod set etype 0x0041"
+ ],
+ "cmdUnderTest": "$TC actions flush action skbmod",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
new file mode 100644
index 000000000..e7e15a733
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
@@ -0,0 +1,888 @@
+[
+ {
+ "id": "2b11",
+ "name": "Add tunnel_key set action with mandatory parameters",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action tunnel_key",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "dc6b",
+ "name": "Add tunnel_key set action with missing mandatory src_ip parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set dst_ip 20.20.20.2 id 100",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action tunnel_key",
+ "matchPattern": "action order [0-9]+: tunnel_key set.*dst_ip 20.20.20.2.*key_id 100",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "7f25",
+ "name": "Add tunnel_key set action with missing mandatory dst_ip parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 id 100",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action tunnel_key",
+ "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 10.10.10.1.*key_id 100",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "a5e0",
+ "name": "Add tunnel_key set action with invalid src_ip parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 300.168.100.1 dst_ip 192.168.200.1 id 7 index 1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 300.168.100.1.*dst_ip 192.168.200.1.*key_id 7.*index 1 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "eaa8",
+ "name": "Add tunnel_key set action with invalid dst_ip parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.100.1 dst_ip 192.168.800.1 id 10 index 11",
+ "expExitCode": "1",
+ "verifyCmd": "$TC actions get action tunnel_key index 11",
+ "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 192.168.100.1.*dst_ip 192.168.800.1.*key_id 10.*index 11 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "3b09",
+ "name": "Add tunnel_key set action with invalid id parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 112233445566778899 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 112233445566778899.*index 1 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "9625",
+ "name": "Add tunnel_key set action with invalid dst_port parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 dst_port 998877 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 11.*dst_port 998877.*index 1 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "05af",
+ "name": "Add tunnel_key set action with optional dst_port parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.100.1 dst_ip 192.168.200.1 id 789 dst_port 4000 index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 10",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.100.1.*dst_ip 192.168.200.1.*key_id 789.*dst_port 4000.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "da80",
+ "name": "Add tunnel_key set action with index at 32-bit maximum",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 4294967295",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*id 11.*index 4294967295 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "d407",
+ "name": "Add tunnel_key set action with index exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 index 4294967295678",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 4294967295678",
+ "matchPattern": "action order [0-9]+: tunnel_key set.*index 4294967295678 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "5cba",
+ "name": "Add tunnel_key set action with id value at 32-bit maximum",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 4294967295 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 4294967295.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "e84a",
+ "name": "Add tunnel_key set action with id value exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42949672955 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 4294967295",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42949672955.*index 1",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "9c19",
+ "name": "Add tunnel_key set action with dst_port value at 16-bit maximum",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 429 dst_port 65535 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 429.*dst_port 65535.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "3bd9",
+ "name": "Add tunnel_key set action with dst_port value exceeding 16-bit maximum",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 429 dst_port 65535789 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 429.*dst_port 65535789.*index 1",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "68e2",
+ "name": "Add tunnel_key unset action",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key unset index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*unset.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "6192",
+ "name": "Add tunnel_key unset continue action",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key unset continue index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*unset continue.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "061d",
+ "name": "Add tunnel_key set continue action with cookie",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.10.1 dst_ip 192.168.20.2 id 123 continue index 1 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.10.1.*dst_ip 192.168.20.2.*key_id 123.*csum continue.*index 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "8acb",
+ "name": "Add tunnel_key set continue action with invalid cookie",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.10.1 dst_ip 192.168.20.2 id 123 continue index 1 cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.10.1.*dst_ip 192.168.20.2.*key_id 123.*csum continue.*index 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "a07e",
+ "name": "Add tunnel_key action with no set/unset command specified",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "b227",
+ "name": "Add tunnel_key action with csum option",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1 csum index 99",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 99",
+ "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*csum pipe.*index 99",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "58a7",
+ "name": "Add tunnel_key action with nocsum option",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7823 nocsum index 234",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 234",
+ "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7823.*nocsum pipe.*index 234",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "2575",
+ "name": "Add tunnel_key action with not-supported parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7 foobar 999 index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 4",
+ "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*foobar 999.*index 4",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "7a88",
+ "name": "Add tunnel_key action with cookie parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7 index 4 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 4",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*csum pipe.*index 4 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "4f20",
+ "name": "Add tunnel_key action with a single geneve option parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "e33d",
+ "name": "Add tunnel_key action with multiple geneve options parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "0778",
+ "name": "Add tunnel_key action with invalid class geneve option parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 824212:80:00880022 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 824212:80:00880022.*index 1",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "4ae8",
+ "name": "Add tunnel_key action with invalid type geneve option parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:4224:00880022 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:4224:00880022.*index 1",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "4039",
+ "name": "Add tunnel_key action with short data length geneve option parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:4288 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:4288.*index 1",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "26a6",
+ "name": "Add tunnel_key action with non-multiple of 4 data length geneve option parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:4288428822 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:4288428822.*index 1",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "f44d",
+ "name": "Add tunnel_key action with incomplete geneve options parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022,0408:42: index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022,0408:42:.*index 1",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "7afc",
+ "name": "Replace tunnel_key set action with all parameters",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 csum id 1 index 1"
+ ],
+ "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 11.11.11.1 dst_ip 21.21.21.2 dst_port 3129 nocsum id 11 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 11.11.11.1.*dst_ip 21.21.21.2.*key_id 11.*dst_port 3129.*nocsum pipe.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "364d",
+ "name": "Replace tunnel_key set action with all parameters and cookie",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 index 1 cookie aabbccddeeff112233445566778800a"
+ ],
+ "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 11.11.11.1 dst_ip 21.21.21.2 dst_port 3129 id 11 csum reclassify index 1 cookie a1b1c1d1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 11.11.11.1.*dst_ip 21.21.21.2.*key_id 11.*dst_port 3129.*csum reclassify.*index 1.*cookie a1b1c1d1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "937c",
+ "name": "Fetch all existing tunnel_key actions",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 pipe index 1",
+ "$TC actions add action tunnel_key set src_ip 11.10.10.1 dst_ip 21.20.20.2 dst_port 3129 csum id 2 jump 10 index 2",
+ "$TC actions add action tunnel_key set src_ip 12.10.10.1 dst_ip 22.20.20.2 dst_port 3130 csum id 3 pass index 3",
+ "$TC actions add action tunnel_key set src_ip 13.10.10.1 dst_ip 23.20.20.2 dst_port 3131 nocsum id 4 continue index 4"
+ ],
+ "cmdUnderTest": "$TC actions list action tunnel_key",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action tunnel_key",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*dst_port 3128.*nocsum pipe.*index 1.*set.*src_ip 11.10.10.1.*dst_ip 21.20.20.2.*key_id 2.*dst_port 3129.*csum jump 10.*index 2.*set.*src_ip 12.10.10.1.*dst_ip 22.20.20.2.*key_id 3.*dst_port 3130.*csum pass.*index 3.*set.*src_ip 13.10.10.1.*dst_ip 23.20.20.2.*key_id 4.*dst_port 3131.*nocsum continue.*index 4",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "6783",
+ "name": "Flush all existing tunnel_key actions",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 pipe index 1",
+ "$TC actions add action tunnel_key set src_ip 11.10.10.1 dst_ip 21.20.20.2 dst_port 3129 csum id 2 reclassify index 2",
+ "$TC actions add action tunnel_key set src_ip 12.10.10.1 dst_ip 22.20.20.2 dst_port 3130 csum id 3 pass index 3",
+ "$TC actions add action tunnel_key set src_ip 13.10.10.1 dst_ip 23.20.20.2 dst_port 3131 nocsum id 4 continue index 4"
+ ],
+ "cmdUnderTest": "$TC actions flush action tunnel_key",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action tunnel_key",
+ "matchPattern": "action order [0-9]+:.*",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
new file mode 100644
index 000000000..69ea09eef
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
@@ -0,0 +1,692 @@
+[
+ {
+ "id": "6f5a",
+ "name": "Add vlan pop action with pipe opcode",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop pipe index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*pop.*pipe.*index 8 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "df35",
+ "name": "Add vlan pop action with pass opcode",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop pass index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 8",
+ "matchPattern": "action order [0-9]+: vlan.*pop.*pass.*index 8 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "b0d4",
+ "name": "Add vlan pop action with drop opcode",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop drop index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 8",
+ "matchPattern": "action order [0-9]+: vlan.*pop.*drop.*index 8 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "95ee",
+ "name": "Add vlan pop action with reclassify opcode",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop reclassify index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 8",
+ "matchPattern": "action order [0-9]+: vlan.*pop.*reclassify.*index 8 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "0283",
+ "name": "Add vlan pop action with continue opcode",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop continue index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 8",
+ "matchPattern": "action order [0-9]+: vlan.*pop.*continue.*index 8 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "b6b9",
+ "name": "Add vlan pop action with jump opcode",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop jump 10 index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*jump 10.*index 8 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "87c3",
+ "name": "Add vlan pop action with trap opcode",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop trap index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*pop trap.*index 8 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "a178",
+ "name": "Add vlan pop action with invalid opcode",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop foo index 8",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*pop.*foo.*index 8 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "ee6f",
+ "name": "Add vlan pop action with index at 32-bit maximum",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*pop.*index 4294967295 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "0dfa",
+ "name": "Add vlan pop action with index exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop reclassify index 429496729599",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action vlan index 429496729599",
+ "matchPattern": "action order [0-9]+: vlan.*pop.reclassify.*index 429496729599",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "2b91",
+ "name": "Add vlan invalid action",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan bad_mode",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*bad_mode",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "57fc",
+ "name": "Add vlan push action with invalid protocol type",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push protocol ABCD",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*push",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "3989",
+ "name": "Add vlan push action with default protocol and priority",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push id 123 index 18",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 18",
+ "matchPattern": "action order [0-9]+: vlan.*push id 123 protocol 802.1Q priority 0 pipe.*index 18 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "79dc",
+ "name": "Add vlan push action with protocol 802.1Q and priority 3",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push id 77 protocol 802.1Q priority 3 continue index 734",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 734",
+ "matchPattern": "action order [0-9]+: vlan.*push id 77 protocol 802.1Q priority 3 continue.*index 734 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "4d73",
+ "name": "Add vlan push action with protocol 802.1AD",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push id 1024 protocol 802.1AD pass index 10000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 10000",
+ "matchPattern": "action order [0-9]+: vlan.*push id 1024 protocol 802.1ad priority 0 pass.*index 10000 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "1f4b",
+ "name": "Add vlan push action with maximum 12-bit vlan ID",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push id 4094 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 1",
+ "matchPattern": "action order [0-9]+: vlan.*push id 4094.*protocol 802.1Q.*priority 0.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "1f7b",
+ "name": "Add vlan push action with invalid vlan ID",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push id 5678 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*push id 5678.*index 1 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "fe40",
+ "name": "Add vlan push action with maximum 3-bit IEEE 802.1p priority",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push id 4 priority 7 reclassify index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 1",
+ "matchPattern": "action order [0-9]+: vlan.*push id 4.*protocol 802.1Q.*priority 7.*reclassify.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "5d02",
+ "name": "Add vlan push action with invalid IEEE 802.1p priority",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push id 5 priority 10 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*push id 5.*index 1 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "6812",
+ "name": "Add vlan modify action for protocol 802.1Q",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 100",
+ "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q priority 0 pipe.*index 100 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "5a31",
+ "name": "Add vlan modify action for protocol 802.1AD",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1ad id 500 reclassify index 12",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 12",
+ "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad priority 0 reclassify.*index 12 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "3deb",
+ "name": "Replace existing vlan push action with new ID",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action vlan push id 500 pipe index 12"
+ ],
+ "cmdUnderTest": "$TC actions replace action vlan push id 700 pipe index 12",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 12",
+ "matchPattern": "action order [0-9]+: vlan.*push id 700 protocol 802.1Q priority 0 pipe.*index 12 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "9e76",
+ "name": "Replace existing vlan push action with new protocol",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action vlan push id 1 protocol 802.1Q pipe index 1"
+ ],
+ "cmdUnderTest": "$TC actions replace action vlan push id 1 protocol 802.1ad pipe index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 1",
+ "matchPattern": "action order [0-9]+: vlan.*push id 1 protocol 802.1ad priority 0 pipe.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "ede4",
+ "name": "Replace existing vlan push action with new priority",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action vlan push id 1 protocol 802.1Q priority 3 reclassify index 1"
+ ],
+ "cmdUnderTest": "$TC actions replace action vlan push id 1 priority 4 reclassify index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 1",
+ "matchPattern": "action order [0-9]+: vlan.*push id 1 protocol 802.1Q priority 4 reclassify.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "d413",
+ "name": "Replace existing vlan pop action with new cookie",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action vlan pop continue index 1 cookie 22334455"
+ ],
+ "cmdUnderTest": "$TC actions replace action vlan pop continue index 1 cookie a1b1c2d1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 1",
+ "matchPattern": "action order [0-9]+: vlan.*pop continue.*index 1 ref.*cookie a1b1c2d1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "83a4",
+ "name": "Delete vlan pop action",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action vlan pop index 44"
+ ],
+ "cmdUnderTest": "$TC actions del action vlan index 44",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*pop.*index 44 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "ed1e",
+ "name": "Delete vlan push action for protocol 802.1Q",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action vlan push id 4094 protocol 802.1Q index 999"
+ ],
+ "cmdUnderTest": "$TC actions del action vlan index 999",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*push id 4094 protocol 802.1Q priority 0 pipe.*index 999 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "a2a3",
+ "name": "Flush vlan actions",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action vlan push id 4 protocol 802.1ad index 10",
+ "$TC actions add action vlan push id 4 protocol 802.1ad index 11",
+ "$TC actions add action vlan push id 4 protocol 802.1ad index 12",
+ "$TC actions add action vlan push id 4 protocol 802.1ad index 13"
+ ],
+ "cmdUnderTest": "$TC actions flush action vlan",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*push id 4 protocol 802.1ad",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "1d78",
+ "name": "Add vlan push action with cookie",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push id 4 cookie a0a0a0a0a0a0a0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*push id 4.*cookie a0a0a0a0a0a0a0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
new file mode 100644
index 000000000..3b97cfd7e
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
@@ -0,0 +1,1049 @@
+[
+ {
+ "id": "901f",
+ "name": "Add fw filter with prio at 32-bit maxixum",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 65535 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 65535 protocol all fw",
+ "matchPattern": "pref 65535 fw.*handle 0x1.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "51e2",
+ "name": "Add fw filter with prio exceeding 32-bit maxixum",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 65536 fw action ok",
+ "expExitCode": "255",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 65536 protocol all fw",
+ "matchPattern": "pref 65536 fw.*handle 0x1.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "d987",
+ "name": "Add fw filter with action ok",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "handle 0x1.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "affe",
+ "name": "Add fw filter with action continue",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action continue",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "handle 0x1.*gact action continue",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "28bc",
+ "name": "Add fw filter with action pipe",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "handle 0x1.*gact action pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "8da2",
+ "name": "Add fw filter with action drop",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol all prio 1 fw",
+ "matchPattern": "handle 0x1.*gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "9436",
+ "name": "Add fw filter with action reclassify",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action reclassify",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "handle 0x1.*gact action reclassify",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "95bb",
+ "name": "Add fw filter with action jump 10",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action jump 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "handle 0x1.*gact action jump 10",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "3d74",
+ "name": "Add fw filter with action goto chain 5",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action goto chain 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "handle 0x1.*gact action goto chain 5",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "eb8f",
+ "name": "Add fw filter with invalid action",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action pump",
+ "expExitCode": "255",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "handle 0x1.*gact action pump",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6a79",
+ "name": "Add fw filter with missing mandatory action",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "filter protocol all pref [0-9]+ fw.*handle 0x1",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "8298",
+ "name": "Add fw filter with cookie",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action pipe cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 2 protocol all fw",
+ "matchPattern": "pref 2 fw.*handle 0x1.*gact action pipe.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "a88c",
+ "name": "Add fw filter with invalid cookie",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action continue cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+ "expExitCode": "255",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 2 protocol all fw",
+ "matchPattern": "pref 2 fw.*handle 0x1.*gact action continue.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "10f6",
+ "name": "Add fw filter with handle in hex",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xa1b2ff prio 1 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xa1b2ff prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0xa1b2ff.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "9d51",
+ "name": "Add fw filter with handle at 32-bit maximum",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967295 prio 1 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4294967295 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0xffffffff.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "d939",
+ "name": "Add fw filter with handle exceeding 32-bit maximum",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967296 prio 1 fw action ok",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4294967296 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0x.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "658c",
+ "name": "Add fw filter with mask in hex",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10/0xa1b2f prio 1 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0xa/0xa1b2f",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "86be",
+ "name": "Add fw filter with mask at 32-bit maximum",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10/4294967295 prio 1 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0xa[^/]",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "e635",
+ "name": "Add fw filter with mask exceeding 32-bit maximum",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10/4294967296 prio 1 fw action ok",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0xa",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6cab",
+ "name": "Add fw filter with handle/mask in hex",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xa1b2cdff/0x1a2bffdc prio 1 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xa1b2cdff prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0xa1b2cdff/0x1a2bffdc",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "8700",
+ "name": "Add fw filter with handle/mask at 32-bit maximum",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967295/4294967295 prio 1 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xffffffff prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0xffffffff[^/]",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7d62",
+ "name": "Add fw filter with handle/mask exceeding 32-bit maximum",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967296/4294967296 prio 1 fw action ok",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7b69",
+ "name": "Add fw filter with missing mandatory handle",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 1 fw action ok",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "filter protocol all.*fw.*handle.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "d68b",
+ "name": "Add fw filter with invalid parent",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent aa11b1b2: handle 1 prio 1 fw action ok",
+ "expExitCode": "255",
+ "verifyCmd": "$TC filter dev $DEV1 parent aa11b1b2: handle 1 prio 1 protocol all fw",
+ "matchPattern": "filter protocol all pref 1 fw.*handle 0x1.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "66e0",
+ "name": "Add fw filter with missing mandatory parent id",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 handle 1 prio 1 fw action ok",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "pref [0-9]+ fw.*handle 0x1.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "0ff3",
+ "name": "Add fw filter with classid",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid 3 action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0x1 classid :3.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "9849",
+ "name": "Add fw filter with classid at root",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid ffff:ffff action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "pref 1 fw.*handle 0x1 classid root.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "b7ff",
+ "name": "Add fw filter with classid - keeps last 8 (hex) digits",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid 98765fedcb action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0x1 classid 765f:edcb.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2b18",
+ "name": "Add fw filter with invalid classid",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid 6789defg action ok",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0x1 classid 6789:defg.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "fade",
+ "name": "Add fw filter with flowid",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10 prio 1 fw flowid 1:10 action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+ "matchPattern": "filter parent ffff: protocol all pref 1 fw.*handle 0xa classid 1:10.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "33af",
+ "name": "Add fw filter with flowid then classid (same arg, takes second)",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 11 prio 1 fw flowid 10 classid 4 action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 11 prio 1 protocol all fw",
+ "matchPattern": "filter parent ffff: protocol all pref 1 fw.*handle 0xb classid :4.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "8a8c",
+ "name": "Add fw filter with classid then flowid (same arg, takes second)",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 11 prio 1 fw classid 4 flowid 10 action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 11 prio 1 protocol all fw",
+ "matchPattern": "filter parent ffff: protocol all pref 1 fw.*handle 0xb classid :10.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "b50d",
+ "name": "Add fw filter with handle val/mask and flowid 10:1000",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 3 handle 10/0xff fw flowid 10:1000 action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 3 protocol all fw",
+ "matchPattern": "filter parent ffff: protocol all pref 3 fw.*handle 0xa/0xff classid 10:1000.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7207",
+ "name": "Add fw filter with protocol ip",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 handle 3 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 3 prio 1 protocol ip fw",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 fw.*handle 0x3.*gact action pass.*index [0-9]+ ref [0-9]+ bind [0-9]+",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "306d",
+ "name": "Add fw filter with protocol ipv6",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ipv6 prio 2 handle 4 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 2 protocol ipv6 fw",
+ "matchPattern": "filter parent ffff: protocol ipv6 pref 2 fw.*handle 0x4.*gact action pass.*index [0-9]+ ref [0-9]+ bind [0-9]+",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "9a78",
+ "name": "Add fw filter with protocol arp",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol arp prio 5 handle 7 fw action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 7 prio 5 protocol arp fw",
+ "matchPattern": "filter parent ffff: protocol arp pref 5 fw.*handle 0x7.*gact action drop.*index [0-9]+ ref [0-9]+ bind [0-9]+",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1821",
+ "name": "Add fw filter with protocol 802_3",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol 802_3 handle 1 prio 1 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol 802_3 fw",
+ "matchPattern": "filter parent ffff: protocol 802_3 pref 1 fw.*handle 0x1.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2260",
+ "name": "Add fw filter with invalid protocol",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol igmp handle 1 prio 1 fw action ok",
+ "expExitCode": "255",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol igmp fw",
+ "matchPattern": "filter parent ffff: protocol igmp pref 1 fw.*handle 0x1.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "09d7",
+ "name": "Add fw filters protocol 802_3 and ip with conflicting priorities",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: protocol 802_3 prio 3 handle 7 fw action ok"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 3 handle 8 fw action ok",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 8 prio 3 protocol ip fw",
+ "matchPattern": "filter parent ffff: protocol ip pref 3 fw.*handle 0x8",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6973",
+ "name": "Add fw filters with same index, same action",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: prio 6 handle 2 fw action continue index 5"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 8 handle 4 fw action continue index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 8 protocol all fw",
+ "matchPattern": "filter parent ffff: protocol all pref 8 fw.*handle 0x4.*gact action continue.*index 5 ref 2 bind 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "fc06",
+ "name": "Add fw filters with action police",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 3 handle 4 fw action police rate 1kbit burst 10k index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 3 protocol all fw",
+ "matchPattern": "filter parent ffff: protocol all pref 3 fw.*handle 0x4.*police 0x5 rate 1Kbit burst 10Kb mtu 2Kb action reclassify overhead 0b.*ref 1 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "aac7",
+ "name": "Add fw filters with action police linklayer atm",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 3 handle 4 fw action police rate 2mbit burst 200k linklayer atm index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 3 protocol all fw",
+ "matchPattern": "filter parent ffff: protocol all pref 3 fw.*handle 0x4.*police 0x8 rate 2Mbit burst 200Kb mtu 2Kb action reclassify overhead 0b linklayer atm.*ref 1 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "5339",
+ "name": "Del entire fw filter",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
+ "$TC filter add dev $DEV1 parent ffff: handle 3 prio 9 fw action pass"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff:",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "protocol all pref.*handle.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "0e99",
+ "name": "Del single fw filter x1",
+ "__comment__": "First of two tests to check that one filter is there and the other isn't",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
+ "$TC filter add dev $DEV1 parent ffff: handle 3 prio 9 fw action pass"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 3 prio 9 fw action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "protocol all pref 7.*handle 0x5.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "f54c",
+ "name": "Del single fw filter x2",
+ "__comment__": "Second of two tests to check that one filter is there and the other isn't",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
+ "$TC filter add dev $DEV1 parent ffff: handle 3 prio 9 fw action pass"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 3 prio 9 fw action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "protocol all pref 9.*handle 0x3.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "ba94",
+ "name": "Del fw filter by prio",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 1 prio 4 fw action ok",
+ "$TC filter add dev $DEV1 parent ffff: handle 2 prio 4 fw action ok"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: prio 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "pref 4 fw.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4acb",
+ "name": "Del fw filter by chain",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 chain 13 fw action pipe",
+ "$TC filter add dev $DEV1 parent ffff: handle 3 prio 5 chain 13 fw action pipe"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: chain 13",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "fw chain 13 handle.*gact action pipe",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "3424",
+ "name": "Del fw filter by action (invalid)",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 2 prio 4 fw action drop"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: fw action drop",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 2 prio 4 protocol all fw",
+ "matchPattern": "handle 0x2.*gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "da89",
+ "name": "Del fw filter by handle (invalid)",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 3 prio 4 fw action continue"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 3 fw",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 3 prio 4 protocol all fw",
+ "matchPattern": "handle 0x3.*gact action continue",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4d95",
+ "name": "Del fw filter by protocol (invalid)",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 protocol arp fw action pipe"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: protocol arp fw",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 2 protocol arp fw",
+ "matchPattern": "filter parent ffff: protocol arp.*handle 0x4.*gact action pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4736",
+ "name": "Del fw filter by flowid (invalid)",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 fw action pipe flowid 45"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: fw flowid 45",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "handle 0x4.*gact action pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "3dcb",
+ "name": "Replace fw filter action",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 prio 2 fw action pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "pref 2 fw.*handle 0x1.*gact action pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "eb4d",
+ "name": "Replace fw filter classid",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 prio 2 fw action pipe classid 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "pref 2 fw.*handle 0x1 classid :2.*gact action pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "67ec",
+ "name": "Replace fw filter index",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok index 3"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 prio 2 fw action ok index 16",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "pref 2 fw.*handle 0x1.*gact action pass.*index 16",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
new file mode 100644
index 000000000..99a5ffca1
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -0,0 +1,42 @@
+[
+ {
+ "id": "e9a3",
+ "name": "Add u32 with source match",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 u32 match ip src 127.0.0.1/32 flowid 1:1 action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "match 7f000001/ffffffff at 12",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "d052",
+ "name": "Add 1M filters with the same action",
+ "category": [
+ "filter",
+ "flower"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV2 ingress",
+ "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000"
+ ],
+ "cmdUnderTest": "$TC -b $BATCH_FILE",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order 0: gact action drop.*index 1 ref 1000000 bind 1000000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV2 ingress",
+ "/bin/rm $BATCH_FILE"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
new file mode 100755
index 000000000..7607ba3e3
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -0,0 +1,663 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+tdc.py - Linux tc (Traffic Control) unit test driver
+
+Copyright (C) 2017 Lucas Bates <lucasb@mojatatu.com>
+"""
+
+import re
+import os
+import sys
+import argparse
+import importlib
+import json
+import subprocess
+import time
+import traceback
+from collections import OrderedDict
+from string import Template
+
+from tdc_config import *
+from tdc_helper import *
+
+import TdcPlugin
+
+
+class PluginMgrTestFail(Exception):
+ def __init__(self, stage, output, message):
+ self.stage = stage
+ self.output = output
+ self.message = message
+
+class PluginMgr:
+ def __init__(self, argparser):
+ super().__init__()
+ self.plugins = {}
+ self.plugin_instances = []
+ self.args = []
+ self.argparser = argparser
+
+ # TODO, put plugins in order
+ plugindir = os.getenv('TDC_PLUGIN_DIR', './plugins')
+ for dirpath, dirnames, filenames in os.walk(plugindir):
+ for fn in filenames:
+ if (fn.endswith('.py') and
+ not fn == '__init__.py' and
+ not fn.startswith('#') and
+ not fn.startswith('.#')):
+ mn = fn[0:-3]
+ foo = importlib.import_module('plugins.' + mn)
+ self.plugins[mn] = foo
+ self.plugin_instances.append(foo.SubPlugin())
+
+ def call_pre_suite(self, testcount, testidlist):
+ for pgn_inst in self.plugin_instances:
+ pgn_inst.pre_suite(testcount, testidlist)
+
+ def call_post_suite(self, index):
+ for pgn_inst in reversed(self.plugin_instances):
+ pgn_inst.post_suite(index)
+
+ def call_pre_case(self, test_ordinal, testid):
+ for pgn_inst in self.plugin_instances:
+ try:
+ pgn_inst.pre_case(test_ordinal, testid)
+ except Exception as ee:
+ print('exception {} in call to pre_case for {} plugin'.
+ format(ee, pgn_inst.__class__))
+ print('test_ordinal is {}'.format(test_ordinal))
+ print('testid is {}'.format(testid))
+ raise
+
+ def call_post_case(self):
+ for pgn_inst in reversed(self.plugin_instances):
+ pgn_inst.post_case()
+
+ def call_pre_execute(self):
+ for pgn_inst in self.plugin_instances:
+ pgn_inst.pre_execute()
+
+ def call_post_execute(self):
+ for pgn_inst in reversed(self.plugin_instances):
+ pgn_inst.post_execute()
+
+ def call_add_args(self, parser):
+ for pgn_inst in self.plugin_instances:
+ parser = pgn_inst.add_args(parser)
+ return parser
+
+ def call_check_args(self, args, remaining):
+ for pgn_inst in self.plugin_instances:
+ pgn_inst.check_args(args, remaining)
+
+ def call_adjust_command(self, stage, command):
+ for pgn_inst in self.plugin_instances:
+ command = pgn_inst.adjust_command(stage, command)
+ return command
+
+ @staticmethod
+ def _make_argparser(args):
+ self.argparser = argparse.ArgumentParser(
+ description='Linux TC unit tests')
+
+
+def replace_keywords(cmd):
+ """
+ For a given executable command, substitute any known
+ variables contained within NAMES with the correct values
+ """
+ tcmd = Template(cmd)
+ subcmd = tcmd.safe_substitute(NAMES)
+ return subcmd
+
+
+def exec_cmd(args, pm, stage, command):
+ """
+ Perform any required modifications on an executable command, then run
+ it in a subprocess and return the results.
+ """
+ if len(command.strip()) == 0:
+ return None, None
+ if '$' in command:
+ command = replace_keywords(command)
+
+ command = pm.call_adjust_command(stage, command)
+ if args.verbose > 0:
+ print('command "{}"'.format(command))
+ proc = subprocess.Popen(command,
+ shell=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=ENVIR)
+ (rawout, serr) = proc.communicate()
+
+ if proc.returncode != 0 and len(serr) > 0:
+ foutput = serr.decode("utf-8", errors="ignore")
+ else:
+ foutput = rawout.decode("utf-8", errors="ignore")
+
+ proc.stdout.close()
+ proc.stderr.close()
+ return proc, foutput
+
+
+def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
+ """
+ Execute the setup/teardown commands for a test case.
+ Optionally terminate test execution if the command fails.
+ """
+ if args.verbose > 0:
+ print('{}'.format(prefix))
+ for cmdinfo in cmdlist:
+ if isinstance(cmdinfo, list):
+ exit_codes = cmdinfo[1:]
+ cmd = cmdinfo[0]
+ else:
+ exit_codes = [0]
+ cmd = cmdinfo
+
+ if not cmd:
+ continue
+
+ (proc, foutput) = exec_cmd(args, pm, stage, cmd)
+
+ if proc and (proc.returncode not in exit_codes):
+ print('', file=sys.stderr)
+ print("{} *** Could not execute: \"{}\"".format(prefix, cmd),
+ file=sys.stderr)
+ print("\n{} *** Error message: \"{}\"".format(prefix, foutput),
+ file=sys.stderr)
+ print("returncode {}; expected {}".format(proc.returncode,
+ exit_codes))
+ print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr)
+ print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr)
+ print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr)
+ raise PluginMgrTestFail(
+ stage, output,
+ '"{}" did not complete successfully'.format(prefix))
+
+def run_one_test(pm, args, index, tidx):
+ global NAMES
+ result = True
+ tresult = ""
+ tap = ""
+ if args.verbose > 0:
+ print("\t====================\n=====> ", end="")
+ print("Test " + tidx["id"] + ": " + tidx["name"])
+
+ # populate NAMES with TESTID for this test
+ NAMES['TESTID'] = tidx['id']
+
+ pm.call_pre_case(index, tidx['id'])
+ prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"])
+
+ if (args.verbose > 0):
+ print('-----> execute stage')
+ pm.call_pre_execute()
+ (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"])
+ if p:
+ exit_code = p.returncode
+ else:
+ exit_code = None
+
+ pm.call_post_execute()
+
+ if (exit_code is None or exit_code != int(tidx["expExitCode"])):
+ result = False
+ print("exit: {!r}".format(exit_code))
+ print("exit: {}".format(int(tidx["expExitCode"])))
+ #print("exit: {!r} {}".format(exit_code, int(tidx["expExitCode"])))
+ print(procout)
+ else:
+ if args.verbose > 0:
+ print('-----> verify stage')
+ match_pattern = re.compile(
+ str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE)
+ (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"])
+ if procout:
+ match_index = re.findall(match_pattern, procout)
+ if len(match_index) != int(tidx["matchCount"]):
+ result = False
+ elif int(tidx["matchCount"]) != 0:
+ result = False
+
+ if not result:
+ tresult += 'not '
+ tresult += 'ok {} - {} # {}\n'.format(str(index), tidx['id'], tidx['name'])
+ tap += tresult
+
+ if result == False:
+ if procout:
+ tap += procout
+ else:
+ tap += 'No output!\n'
+
+ prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout)
+ pm.call_post_case()
+
+ index += 1
+
+ # remove TESTID from NAMES
+ del(NAMES['TESTID'])
+ return tap
+
+def test_runner(pm, args, filtered_tests):
+ """
+ Driver function for the unit tests.
+
+ Prints information about the tests being run, executes the setup and
+ teardown commands and the command under test itself. Also determines
+ success/failure based on the information in the test case and generates
+ TAP output accordingly.
+ """
+ testlist = filtered_tests
+ tcount = len(testlist)
+ index = 1
+ tap = ''
+ badtest = None
+ stage = None
+ emergency_exit = False
+ emergency_exit_message = ''
+
+ if args.notap:
+ if args.verbose:
+ tap = 'notap requested: omitting test plan\n'
+ else:
+ tap = str(index) + ".." + str(tcount) + "\n"
+ try:
+ pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
+ except Exception as ee:
+ ex_type, ex, ex_tb = sys.exc_info()
+ print('Exception {} {} (caught in pre_suite).'.
+ format(ex_type, ex))
+ # when the extra print statements are uncommented,
+ # the traceback does not appear between them
+ # (it appears way earlier in the tdc.py output)
+ # so don't bother ...
+ # print('--------------------(')
+ # print('traceback')
+ traceback.print_tb(ex_tb)
+ # print('--------------------)')
+ emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex)
+ emergency_exit = True
+ stage = 'pre-SUITE'
+
+ if emergency_exit:
+ pm.call_post_suite(index)
+ return emergency_exit_message
+ if args.verbose > 1:
+ print('give test rig 2 seconds to stabilize')
+ time.sleep(2)
+ for tidx in testlist:
+ if "flower" in tidx["category"] and args.device == None:
+ if args.verbose > 1:
+ print('Not executing test {} {} because DEV2 not defined'.
+ format(tidx['id'], tidx['name']))
+ continue
+ try:
+ badtest = tidx # in case it goes bad
+ tap += run_one_test(pm, args, index, tidx)
+ except PluginMgrTestFail as pmtf:
+ ex_type, ex, ex_tb = sys.exc_info()
+ stage = pmtf.stage
+ message = pmtf.message
+ output = pmtf.output
+ print(message)
+ print('Exception {} {} (caught in test_runner, running test {} {} {} stage {})'.
+ format(ex_type, ex, index, tidx['id'], tidx['name'], stage))
+ print('---------------')
+ print('traceback')
+ traceback.print_tb(ex_tb)
+ print('---------------')
+ if stage == 'teardown':
+ print('accumulated output for this test:')
+ if pmtf.output:
+ print(pmtf.output)
+ print('---------------')
+ break
+ index += 1
+
+ # if we failed in setup or teardown,
+ # fill in the remaining tests with ok-skipped
+ count = index
+ if not args.notap:
+ tap += 'about to flush the tap output if tests need to be skipped\n'
+ if tcount + 1 != index:
+ for tidx in testlist[index - 1:]:
+ msg = 'skipped - previous {} failed'.format(stage)
+ tap += 'ok {} - {} # {} {} {}\n'.format(
+ count, tidx['id'], msg, index, badtest.get('id', '--Unknown--'))
+ count += 1
+
+ tap += 'done flushing skipped test tap output\n'
+
+ if args.pause:
+ print('Want to pause\nPress enter to continue ...')
+ if input(sys.stdin):
+ print('got something on stdin')
+
+ pm.call_post_suite(index)
+
+ return tap
+
+def has_blank_ids(idlist):
+ """
+ Search the list for empty ID fields and return true/false accordingly.
+ """
+ return not(all(k for k in idlist))
+
+
+def load_from_file(filename):
+ """
+ Open the JSON file containing the test cases and return them
+ as list of ordered dictionary objects.
+ """
+ try:
+ with open(filename) as test_data:
+ testlist = json.load(test_data, object_pairs_hook=OrderedDict)
+ except json.JSONDecodeError as jde:
+ print('IGNORING test case file {}\n\tBECAUSE: {}'.format(filename, jde))
+ testlist = list()
+ else:
+ idlist = get_id_list(testlist)
+ if (has_blank_ids(idlist)):
+ for k in testlist:
+ k['filename'] = filename
+ return testlist
+
+
+def args_parse():
+ """
+ Create the argument parser.
+ """
+ parser = argparse.ArgumentParser(description='Linux TC unit tests')
+ return parser
+
+
+def set_args(parser):
+ """
+ Set the command line arguments for tdc.
+ """
+ parser.add_argument(
+ '-p', '--path', type=str,
+ help='The full path to the tc executable to use')
+ sg = parser.add_argument_group(
+ 'selection', 'select which test cases: ' +
+ 'files plus directories; filtered by categories plus testids')
+ ag = parser.add_argument_group(
+ 'action', 'select action to perform on selected test cases')
+
+ sg.add_argument(
+ '-D', '--directory', nargs='+', metavar='DIR',
+ help='Collect tests from the specified directory(ies) ' +
+ '(default [tc-tests])')
+ sg.add_argument(
+ '-f', '--file', nargs='+', metavar='FILE',
+ help='Run tests from the specified file(s)')
+ sg.add_argument(
+ '-c', '--category', nargs='*', metavar='CATG', default=['+c'],
+ help='Run tests only from the specified category/ies, ' +
+ 'or if no category/ies is/are specified, list known categories.')
+ sg.add_argument(
+ '-e', '--execute', nargs='+', metavar='ID',
+ help='Execute the specified test cases with specified IDs')
+ ag.add_argument(
+ '-l', '--list', action='store_true',
+ help='List all test cases, or those only within the specified category')
+ ag.add_argument(
+ '-s', '--show', action='store_true', dest='showID',
+ help='Display the selected test cases')
+ ag.add_argument(
+ '-i', '--id', action='store_true', dest='gen_id',
+ help='Generate ID numbers for new test cases')
+ parser.add_argument(
+ '-v', '--verbose', action='count', default=0,
+ help='Show the commands that are being run')
+ parser.add_argument(
+ '-N', '--notap', action='store_true',
+ help='Suppress tap results for command under test')
+ parser.add_argument('-d', '--device',
+ help='Execute the test case in flower category')
+ parser.add_argument(
+ '-P', '--pause', action='store_true',
+ help='Pause execution just before post-suite stage')
+ return parser
+
+
+def check_default_settings(args, remaining, pm):
+ """
+ Process any arguments overriding the default settings,
+ and ensure the settings are correct.
+ """
+ # Allow for overriding specific settings
+ global NAMES
+
+ if args.path != None:
+ NAMES['TC'] = args.path
+ if args.device != None:
+ NAMES['DEV2'] = args.device
+ if not os.path.isfile(NAMES['TC']):
+ print("The specified tc path " + NAMES['TC'] + " does not exist.")
+ exit(1)
+
+ pm.call_check_args(args, remaining)
+
+
+def get_id_list(alltests):
+ """
+ Generate a list of all IDs in the test cases.
+ """
+ return [x["id"] for x in alltests]
+
+
+def check_case_id(alltests):
+ """
+ Check for duplicate test case IDs.
+ """
+ idl = get_id_list(alltests)
+ return [x for x in idl if idl.count(x) > 1]
+
+
+def does_id_exist(alltests, newid):
+ """
+ Check if a given ID already exists in the list of test cases.
+ """
+ idl = get_id_list(alltests)
+ return (any(newid == x for x in idl))
+
+
+def generate_case_ids(alltests):
+ """
+ If a test case has a blank ID field, generate a random hex ID for it
+ and then write the test cases back to disk.
+ """
+ import random
+ for c in alltests:
+ if (c["id"] == ""):
+ while True:
+ newid = str('{:04x}'.format(random.randrange(16**4)))
+ if (does_id_exist(alltests, newid)):
+ continue
+ else:
+ c['id'] = newid
+ break
+
+ ufilename = []
+ for c in alltests:
+ if ('filename' in c):
+ ufilename.append(c['filename'])
+ ufilename = get_unique_item(ufilename)
+ for f in ufilename:
+ testlist = []
+ for t in alltests:
+ if 'filename' in t:
+ if t['filename'] == f:
+ del t['filename']
+ testlist.append(t)
+ outfile = open(f, "w")
+ json.dump(testlist, outfile, indent=4)
+ outfile.write("\n")
+ outfile.close()
+
+def filter_tests_by_id(args, testlist):
+ '''
+ Remove tests from testlist that are not in the named id list.
+ If id list is empty, return empty list.
+ '''
+ newlist = list()
+ if testlist and args.execute:
+ target_ids = args.execute
+
+ if isinstance(target_ids, list) and (len(target_ids) > 0):
+ newlist = list(filter(lambda x: x['id'] in target_ids, testlist))
+ return newlist
+
+def filter_tests_by_category(args, testlist):
+ '''
+ Remove tests from testlist that are not in a named category.
+ '''
+ answer = list()
+ if args.category and testlist:
+ test_ids = list()
+ for catg in set(args.category):
+ if catg == '+c':
+ continue
+ print('considering category {}'.format(catg))
+ for tc in testlist:
+ if catg in tc['category'] and tc['id'] not in test_ids:
+ answer.append(tc)
+ test_ids.append(tc['id'])
+
+ return answer
+
+def get_test_cases(args):
+ """
+ If a test case file is specified, retrieve tests from that file.
+ Otherwise, glob for all json files in subdirectories and load from
+ each one.
+ Also, if requested, filter by category, and add tests matching
+ certain ids.
+ """
+ import fnmatch
+
+ flist = []
+ testdirs = ['tc-tests']
+
+ if args.file:
+ # at least one file was specified - remove the default directory
+ testdirs = []
+
+ for ff in args.file:
+ if not os.path.isfile(ff):
+ print("IGNORING file " + ff + "\n\tBECAUSE does not exist.")
+ else:
+ flist.append(os.path.abspath(ff))
+
+ if args.directory:
+ testdirs = args.directory
+
+ for testdir in testdirs:
+ for root, dirnames, filenames in os.walk(testdir):
+ for filename in fnmatch.filter(filenames, '*.json'):
+ candidate = os.path.abspath(os.path.join(root, filename))
+ if candidate not in testdirs:
+ flist.append(candidate)
+
+ alltestcases = list()
+ for casefile in flist:
+ alltestcases = alltestcases + (load_from_file(casefile))
+
+ allcatlist = get_test_categories(alltestcases)
+ allidlist = get_id_list(alltestcases)
+
+ testcases_by_cats = get_categorized_testlist(alltestcases, allcatlist)
+ idtestcases = filter_tests_by_id(args, alltestcases)
+ cattestcases = filter_tests_by_category(args, alltestcases)
+
+ cat_ids = [x['id'] for x in cattestcases]
+ if args.execute:
+ if args.category:
+ alltestcases = cattestcases + [x for x in idtestcases if x['id'] not in cat_ids]
+ else:
+ alltestcases = idtestcases
+ else:
+ if cat_ids:
+ alltestcases = cattestcases
+ else:
+ # just accept the existing value of alltestcases,
+ # which has been filtered by file/directory
+ pass
+
+ return allcatlist, allidlist, testcases_by_cats, alltestcases
+
+
+def set_operation_mode(pm, args):
+ """
+ Load the test case data and process remaining arguments to determine
+ what the script should do for this run, and call the appropriate
+ function.
+ """
+ ucat, idlist, testcases, alltests = get_test_cases(args)
+
+ if args.gen_id:
+ if (has_blank_ids(idlist)):
+ alltests = generate_case_ids(alltests)
+ else:
+ print("No empty ID fields found in test files.")
+ exit(0)
+
+ duplicate_ids = check_case_id(alltests)
+ if (len(duplicate_ids) > 0):
+ print("The following test case IDs are not unique:")
+ print(str(set(duplicate_ids)))
+ print("Please correct them before continuing.")
+ exit(1)
+
+ if args.showID:
+ for atest in alltests:
+ print_test_case(atest)
+ exit(0)
+
+ if isinstance(args.category, list) and (len(args.category) == 0):
+ print("Available categories:")
+ print_sll(ucat)
+ exit(0)
+
+ if args.list:
+ if args.list:
+ list_test_cases(alltests)
+ exit(0)
+
+ if len(alltests):
+ catresults = test_runner(pm, args, alltests)
+ else:
+ catresults = 'No tests found\n'
+ if args.notap:
+ print('Tap output suppression requested\n')
+ else:
+ print('All test results: \n\n{}'.format(catresults))
+
+def main():
+ """
+ Start of execution; set up argument parser and get the arguments,
+ and start operations.
+ """
+ parser = args_parse()
+ parser = set_args(parser)
+ pm = PluginMgr(parser)
+ parser = pm.call_add_args(parser)
+ (args, remaining) = parser.parse_known_args()
+ args.NAMES = NAMES
+ check_default_settings(args, remaining, pm)
+ if args.verbose > 2:
+ print('args is {}'.format(args))
+
+ set_operation_mode(pm, args)
+
+ exit(0)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py
new file mode 100755
index 000000000..3d8350d6b
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc_batch.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+
+"""
+tdc_batch.py - a script to generate TC batch file
+
+Copyright (C) 2017 Chris Mi <chrism@mellanox.com>
+"""
+
+import argparse
+
+parser = argparse.ArgumentParser(description='TC batch file generator')
+parser.add_argument("device", help="device name")
+parser.add_argument("file", help="batch file name")
+parser.add_argument("-n", "--number", type=int,
+ help="how many lines in batch file")
+parser.add_argument("-o", "--skip_sw",
+ help="skip_sw (offload), by default skip_hw",
+ action="store_true")
+parser.add_argument("-s", "--share_action",
+ help="all filters share the same action",
+ action="store_true")
+parser.add_argument("-p", "--prio",
+ help="all filters have different prio",
+ action="store_true")
+args = parser.parse_args()
+
+device = args.device
+file = open(args.file, 'w')
+
+number = 1
+if args.number:
+ number = args.number
+
+skip = "skip_hw"
+if args.skip_sw:
+ skip = "skip_sw"
+
+share_action = ""
+if args.share_action:
+ share_action = "index 1"
+
+prio = "prio 1"
+if args.prio:
+ prio = ""
+ if number > 0x4000:
+ number = 0x4000
+
+index = 0
+for i in range(0x100):
+ for j in range(0x100):
+ for k in range(0x100):
+ mac = ("{:02x}:{:02x}:{:02x}".format(i, j, k))
+ src_mac = "e4:11:00:" + mac
+ dst_mac = "e4:12:00:" + mac
+ cmd = ("filter add dev {} {} protocol ip parent ffff: flower {} "
+ "src_mac {} dst_mac {} action drop {}".format
+ (device, prio, skip, src_mac, dst_mac, share_action))
+ file.write("{}\n".format(cmd))
+ index += 1
+ if index >= number:
+ file.close()
+ exit(0)
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
new file mode 100644
index 000000000..d651bc150
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -0,0 +1,36 @@
+"""
+# SPDX-License-Identifier: GPL-2.0
+tdc_config.py - tdc user-specified values
+
+Copyright (C) 2017 Lucas Bates <lucasb@mojatatu.com>
+"""
+
+# Dictionary containing all values that can be substituted in executable
+# commands.
+NAMES = {
+ # Substitute your own tc path here
+ 'TC': '/sbin/tc',
+ # Name of veth devices to be created for the namespace
+ 'DEV0': 'v0p0',
+ 'DEV1': 'v0p1',
+ 'DEV2': '',
+ 'BATCH_FILE': './batch.txt',
+ # Name of the namespace to use
+ 'NS': 'tcut',
+ # Directory containing eBPF test programs
+ 'EBPFDIR': './bpf'
+ }
+
+
+ENVIR = { }
+
+# put customizations in tdc_config_local.py
+try:
+ from tdc_config_local import *
+except ImportError as ie:
+ pass
+
+try:
+ NAMES.update(EXTRA_NAMES)
+except NameError as ne:
+ pass
diff --git a/tools/testing/selftests/tc-testing/tdc_config_local_template.py b/tools/testing/selftests/tc-testing/tdc_config_local_template.py
new file mode 100644
index 000000000..d48fc732a
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc_config_local_template.py
@@ -0,0 +1,23 @@
+"""
+tdc_config_local.py - tdc plugin-writer-specified values
+
+Copyright (C) 2017 bjb@mojatatu.com
+"""
+
+import os
+
+ENVIR = os.environ.copy()
+
+ENV_LD_LIBRARY_PATH = os.getenv('LD_LIBRARY_PATH', '')
+ENV_OTHER_LIB = os.getenv('OTHER_LIB', '')
+
+
+# example adding value to NAMES, without editing tdc_config.py
+EXTRA_NAMES = dict()
+EXTRA_NAMES['SOME_BIN'] = os.path.join(os.getenv('OTHER_BIN', ''), 'some_bin')
+
+
+# example adding values to ENVIR, without editing tdc_config.py
+ENVIR['VALGRIND_LIB'] = '/usr/lib/valgrind'
+ENVIR['VALGRIND_BIN'] = '/usr/bin/valgrind'
+ENVIR['VGDB_BIN'] = '/usr/bin/vgdb'
diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py
new file mode 100644
index 000000000..9f35c96c8
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc_helper.py
@@ -0,0 +1,67 @@
+"""
+# SPDX-License-Identifier: GPL-2.0
+tdc_helper.py - tdc helper functions
+
+Copyright (C) 2017 Lucas Bates <lucasb@mojatatu.com>
+"""
+
+def get_categorized_testlist(alltests, ucat):
+ """ Sort the master test list into categories. """
+ testcases = dict()
+
+ for category in ucat:
+ testcases[category] = list(filter(lambda x: category in x['category'], alltests))
+
+ return(testcases)
+
+
+def get_unique_item(lst):
+ """ For a list, return a list of the unique items in the list. """
+ return list(set(lst))
+
+
+def get_test_categories(alltests):
+ """ Discover all unique test categories present in the test case file. """
+ ucat = []
+ for t in alltests:
+ ucat.extend(get_unique_item(t['category']))
+ ucat = get_unique_item(ucat)
+ return ucat
+
+def list_test_cases(testlist):
+ """ Print IDs and names of all test cases. """
+ for curcase in testlist:
+ print(curcase['id'] + ': (' + ', '.join(curcase['category']) + ") " + curcase['name'])
+
+
+def list_categories(testlist):
+ """ Show all categories that are present in a test case file. """
+ categories = set(map(lambda x: x['category'], testlist))
+ print("Available categories:")
+ print(", ".join(str(s) for s in categories))
+ print("")
+
+
+def print_list(cmdlist):
+ """ Print a list of strings prepended with a tab. """
+ for l in cmdlist:
+ if (type(l) == list):
+ print("\t" + str(l[0]))
+ else:
+ print("\t" + str(l))
+
+
+def print_sll(items):
+ print("\n".join(str(s) for s in items))
+
+
+def print_test_case(tcase):
+ """ Pretty-printing of a given test case. """
+ print('\n==============\nTest {}\t{}\n'.format(tcase['id'], tcase['name']))
+ for k in tcase.keys():
+ if (isinstance(tcase[k], list)):
+ print(k + ":")
+ print_list(tcase[k])
+ else:
+ if not ((k == 'id') or (k == 'name')):
+ print(k + ": " + str(tcase[k]))
diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore
new file mode 100644
index 000000000..32a9eadb2
--- /dev/null
+++ b/tools/testing/selftests/timers/.gitignore
@@ -0,0 +1,21 @@
+alarmtimer-suspend
+change_skew
+clocksource-switch
+inconsistency-check
+leap-a-day
+leapcrash
+mqueue-lat
+nanosleep
+nsleep-lat
+posix_timers
+raw_skew
+rtcpie
+set-2038
+set-tai
+set-timer-lat
+skew_consistency
+threadtest
+valid-adjtimex
+adjtick
+set-tz
+freq-step
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
new file mode 100644
index 000000000..7656c7ce7
--- /dev/null
+++ b/tools/testing/selftests/timers/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -O3 -Wl,-no-as-needed -Wall
+LDLIBS += -lrt -lpthread -lm
+
+# these are all "safe" tests that don't modify
+# system time or require escalated privileges
+TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
+ inconsistency-check raw_skew threadtest rtcpie
+
+DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \
+ skew_consistency clocksource-switch freq-step leap-a-day \
+ leapcrash set-tai set-2038 set-tz
+
+TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS)
+
+
+include ../lib.mk
+
+# these tests require escalated privileges
+# and may modify the system time or trigger
+# other behavior like suspend
+run_destructive_tests: run_tests
+ $(call RUN_TESTS, $(DESTRUCTIVE_TESTS))
diff --git a/tools/testing/selftests/timers/adjtick.c b/tools/testing/selftests/timers/adjtick.c
new file mode 100644
index 000000000..54d8d87f3
--- /dev/null
+++ b/tools/testing/selftests/timers/adjtick.c
@@ -0,0 +1,211 @@
+/* adjtimex() tick adjustment test
+ * by: John Stultz <john.stultz@linaro.org>
+ * (C) Copyright Linaro Limited 2015
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc adjtick.c -o adjtick -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+
+#include "../kselftest.h"
+
+#define CLOCK_MONOTONIC_RAW 4
+
+#define NSEC_PER_SEC 1000000000LL
+#define USEC_PER_SEC 1000000
+
+#define MILLION 1000000
+
+long systick;
+
+long long llabs(long long val)
+{
+ if (val < 0)
+ val = -val;
+ return val;
+}
+
+unsigned long long ts_to_nsec(struct timespec ts)
+{
+ return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
+struct timespec nsec_to_ts(long long ns)
+{
+ struct timespec ts;
+
+ ts.tv_sec = ns/NSEC_PER_SEC;
+ ts.tv_nsec = ns%NSEC_PER_SEC;
+
+ return ts;
+}
+
+long long diff_timespec(struct timespec start, struct timespec end)
+{
+ long long start_ns, end_ns;
+
+ start_ns = ts_to_nsec(start);
+ end_ns = ts_to_nsec(end);
+
+ return end_ns - start_ns;
+}
+
+void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw)
+{
+ struct timespec start, mid, end;
+ long long diff = 0, tmp;
+ int i;
+
+ clock_gettime(CLOCK_MONOTONIC, mon);
+ clock_gettime(CLOCK_MONOTONIC_RAW, raw);
+
+ /* Try to get a more tightly bound pairing */
+ for (i = 0; i < 3; i++) {
+ long long newdiff;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ clock_gettime(CLOCK_MONOTONIC_RAW, &mid);
+ clock_gettime(CLOCK_MONOTONIC, &end);
+
+ newdiff = diff_timespec(start, end);
+ if (diff == 0 || newdiff < diff) {
+ diff = newdiff;
+ *raw = mid;
+ tmp = (ts_to_nsec(start) + ts_to_nsec(end))/2;
+ *mon = nsec_to_ts(tmp);
+ }
+ }
+}
+
+long long get_ppm_drift(void)
+{
+ struct timespec mon_start, raw_start, mon_end, raw_end;
+ long long delta1, delta2, eppm;
+
+ get_monotonic_and_raw(&mon_start, &raw_start);
+
+ sleep(15);
+
+ get_monotonic_and_raw(&mon_end, &raw_end);
+
+ delta1 = diff_timespec(mon_start, mon_end);
+ delta2 = diff_timespec(raw_start, raw_end);
+
+ eppm = (delta1*MILLION)/delta2 - MILLION;
+
+ return eppm;
+}
+
+int check_tick_adj(long tickval)
+{
+ long long eppm, ppm;
+ struct timex tx1;
+
+ tx1.modes = ADJ_TICK;
+ tx1.modes |= ADJ_OFFSET;
+ tx1.modes |= ADJ_FREQUENCY;
+ tx1.modes |= ADJ_STATUS;
+
+ tx1.status = STA_PLL;
+ tx1.offset = 0;
+ tx1.freq = 0;
+ tx1.tick = tickval;
+
+ adjtimex(&tx1);
+
+ sleep(1);
+
+ ppm = ((long long)tickval * MILLION)/systick - MILLION;
+ printf("Estimating tick (act: %ld usec, %lld ppm): ", tickval, ppm);
+
+ eppm = get_ppm_drift();
+ printf("%lld usec, %lld ppm", systick + (systick * eppm / MILLION), eppm);
+ fflush(stdout);
+
+ tx1.modes = 0;
+ adjtimex(&tx1);
+
+ if (tx1.offset || tx1.freq || tx1.tick != tickval) {
+ printf(" [ERROR]\n");
+ printf("\tUnexpected adjtimex return values, make sure ntpd is not running.\n");
+ return -1;
+ }
+
+ /*
+ * Here we use 100ppm difference as an error bound.
+ * We likely should see better, but some coarse clocksources
+ * cannot match the HZ tick size accurately, so we have a
+ * internal correction factor that doesn't scale exactly
+ * with the adjustment, resulting in > 10ppm error during
+ * a 10% adjustment. 100ppm also gives us more breathing
+ * room for interruptions during the measurement.
+ */
+ if (llabs(eppm - ppm) > 100) {
+ printf(" [FAILED]\n");
+ return -1;
+ }
+ printf(" [OK]\n");
+
+ return 0;
+}
+
+int main(int argv, char **argc)
+{
+ struct timespec raw;
+ long tick, max, interval, err;
+ struct timex tx1;
+
+ err = 0;
+ setbuf(stdout, NULL);
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &raw)) {
+ printf("ERR: NO CLOCK_MONOTONIC_RAW\n");
+ return -1;
+ }
+
+ printf("Each iteration takes about 15 seconds\n");
+
+ systick = sysconf(_SC_CLK_TCK);
+ systick = USEC_PER_SEC/sysconf(_SC_CLK_TCK);
+ max = systick/10; /* +/- 10% */
+ interval = max/4; /* in 4 steps each side */
+
+ for (tick = (systick - max); tick < (systick + max); tick += interval) {
+ if (check_tick_adj(tick)) {
+ err = 1;
+ break;
+ }
+ }
+
+ /* Reset things to zero */
+ tx1.modes = ADJ_TICK;
+ tx1.modes |= ADJ_OFFSET;
+ tx1.modes |= ADJ_FREQUENCY;
+
+ tx1.offset = 0;
+ tx1.freq = 0;
+ tx1.tick = systick;
+
+ adjtimex(&tx1);
+
+ if (err)
+ return ksft_exit_fail();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c
new file mode 100644
index 000000000..4da09dbf8
--- /dev/null
+++ b/tools/testing/selftests/timers/alarmtimer-suspend.c
@@ -0,0 +1,178 @@
+/* alarmtimer suspend test
+ * John Stultz (john.stultz@linaro.org)
+ * (C) Copyright Linaro 2013
+ * Licensed under the GPLv2
+ *
+ * This test makes sure the alarmtimer & RTC wakeup code is
+ * functioning.
+ *
+ * To build:
+ * $ gcc alarmtimer-suspend.c -o alarmtimer-suspend -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include "../kselftest.h"
+
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_PROCESS_CPUTIME_ID 2
+#define CLOCK_THREAD_CPUTIME_ID 3
+#define CLOCK_MONOTONIC_RAW 4
+#define CLOCK_REALTIME_COARSE 5
+#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
+#define CLOCK_REALTIME_ALARM 8
+#define CLOCK_BOOTTIME_ALARM 9
+#define CLOCK_HWSPECIFIC 10
+#define CLOCK_TAI 11
+#define NR_CLOCKIDS 12
+
+
+#define NSEC_PER_SEC 1000000000ULL
+#define UNREASONABLE_LAT (NSEC_PER_SEC * 5) /* hopefully we resume in 5 secs */
+
+#define SUSPEND_SECS 15
+int alarmcount;
+int alarm_clock_id;
+struct timespec start_time;
+
+
+char *clockstring(int clockid)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ return "CLOCK_REALTIME";
+ case CLOCK_MONOTONIC:
+ return "CLOCK_MONOTONIC";
+ case CLOCK_PROCESS_CPUTIME_ID:
+ return "CLOCK_PROCESS_CPUTIME_ID";
+ case CLOCK_THREAD_CPUTIME_ID:
+ return "CLOCK_THREAD_CPUTIME_ID";
+ case CLOCK_MONOTONIC_RAW:
+ return "CLOCK_MONOTONIC_RAW";
+ case CLOCK_REALTIME_COARSE:
+ return "CLOCK_REALTIME_COARSE";
+ case CLOCK_MONOTONIC_COARSE:
+ return "CLOCK_MONOTONIC_COARSE";
+ case CLOCK_BOOTTIME:
+ return "CLOCK_BOOTTIME";
+ case CLOCK_REALTIME_ALARM:
+ return "CLOCK_REALTIME_ALARM";
+ case CLOCK_BOOTTIME_ALARM:
+ return "CLOCK_BOOTTIME_ALARM";
+ case CLOCK_TAI:
+ return "CLOCK_TAI";
+ };
+ return "UNKNOWN_CLOCKID";
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+ long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+ ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+ return ret;
+}
+
+int final_ret = 0;
+
+void sigalarm(int signo)
+{
+ long long delta_ns;
+ struct timespec ts;
+
+ clock_gettime(alarm_clock_id, &ts);
+ alarmcount++;
+
+ delta_ns = timespec_sub(start_time, ts);
+ delta_ns -= NSEC_PER_SEC * SUSPEND_SECS * alarmcount;
+
+ printf("ALARM(%i): %ld:%ld latency: %lld ns ", alarmcount, ts.tv_sec,
+ ts.tv_nsec, delta_ns);
+
+ if (delta_ns > UNREASONABLE_LAT) {
+ printf("[FAIL]\n");
+ final_ret = -1;
+ } else
+ printf("[OK]\n");
+
+}
+
+int main(void)
+{
+ timer_t tm1;
+ struct itimerspec its1, its2;
+ struct sigevent se;
+ struct sigaction act;
+ int signum = SIGRTMAX;
+
+ /* Set up signal handler: */
+ sigfillset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = sigalarm;
+ sigaction(signum, &act, NULL);
+
+ /* Set up timer: */
+ memset(&se, 0, sizeof(se));
+ se.sigev_notify = SIGEV_SIGNAL;
+ se.sigev_signo = signum;
+ se.sigev_value.sival_int = 0;
+
+ for (alarm_clock_id = CLOCK_REALTIME_ALARM;
+ alarm_clock_id <= CLOCK_BOOTTIME_ALARM;
+ alarm_clock_id++) {
+
+ alarmcount = 0;
+ if (timer_create(alarm_clock_id, &se, &tm1) == -1) {
+ printf("timer_create failed, %s unsupported?\n",
+ clockstring(alarm_clock_id));
+ break;
+ }
+
+ clock_gettime(alarm_clock_id, &start_time);
+ printf("Start time (%s): %ld:%ld\n", clockstring(alarm_clock_id),
+ start_time.tv_sec, start_time.tv_nsec);
+ printf("Setting alarm for every %i seconds\n", SUSPEND_SECS);
+ its1.it_value = start_time;
+ its1.it_value.tv_sec += SUSPEND_SECS;
+ its1.it_interval.tv_sec = SUSPEND_SECS;
+ its1.it_interval.tv_nsec = 0;
+
+ timer_settime(tm1, TIMER_ABSTIME, &its1, &its2);
+
+ while (alarmcount < 5)
+ sleep(1); /* First 5 alarms, do nothing */
+
+ printf("Starting suspend loops\n");
+ while (alarmcount < 10) {
+ int ret;
+
+ sleep(3);
+ ret = system("echo mem > /sys/power/state");
+ if (ret)
+ break;
+ }
+ timer_delete(tm1);
+ }
+ if (final_ret)
+ return ksft_exit_fail();
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c
new file mode 100644
index 000000000..c4eab7124
--- /dev/null
+++ b/tools/testing/selftests/timers/change_skew.c
@@ -0,0 +1,96 @@
+/* ADJ_FREQ Skew change test
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2012
+ * Licensed under the GPLv2
+ *
+ * NOTE: This is a meta-test which cranks the ADJ_FREQ knob and
+ * then uses other tests to detect problems. Thus this test requires
+ * that the raw_skew, inconsistency-check and nanosleep tests be
+ * present in the same directory it is run from.
+ *
+ * To build:
+ * $ gcc change_skew.c -o change_skew -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000LL
+
+
+int change_skew_test(int ppm)
+{
+ struct timex tx;
+ int ret;
+
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = ppm << 16;
+
+ ret = adjtimex(&tx);
+ if (ret < 0) {
+ printf("Error adjusting freq\n");
+ return ret;
+ }
+
+ ret = system("./raw_skew");
+ ret |= system("./inconsistency-check");
+ ret |= system("./nanosleep");
+
+ return ret;
+}
+
+
+int main(int argv, char **argc)
+{
+ struct timex tx;
+ int i, ret;
+
+ int ppm[5] = {0, 250, 500, -250, -500};
+
+ /* Kill ntpd */
+ ret = system("killall -9 ntpd");
+
+ /* Make sure there's no offset adjustment going on */
+ tx.modes = ADJ_OFFSET;
+ tx.offset = 0;
+ ret = adjtimex(&tx);
+
+ if (ret < 0) {
+ printf("Maybe you're not running as root?\n");
+ return -1;
+ }
+
+ for (i = 0; i < 5; i++) {
+ printf("Using %i ppm adjustment\n", ppm[i]);
+ ret = change_skew_test(ppm[i]);
+ if (ret)
+ break;
+ }
+
+ /* Set things back */
+ tx.modes = ADJ_FREQUENCY;
+ tx.offset = 0;
+ adjtimex(&tx);
+
+ if (ret) {
+ printf("[FAIL]");
+ return ksft_exit_fail();
+ }
+ printf("[OK]");
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c
new file mode 100644
index 000000000..bfc974b45
--- /dev/null
+++ b/tools/testing/selftests/timers/clocksource-switch.c
@@ -0,0 +1,168 @@
+/* Clocksource change test
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2012
+ * Licensed under the GPLv2
+ *
+ * NOTE: This is a meta-test which quickly changes the clocksourc and
+ * then uses other tests to detect problems. Thus this test requires
+ * that the inconsistency-check and nanosleep tests be present in the
+ * same directory it is run from.
+ *
+ * To build:
+ * $ gcc clocksource-switch.c -o clocksource-switch -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/wait.h>
+#include "../kselftest.h"
+
+
+int get_clocksources(char list[][30])
+{
+ int fd, i;
+ size_t size;
+ char buf[512];
+ char *head, *tmp;
+
+ fd = open("/sys/devices/system/clocksource/clocksource0/available_clocksource", O_RDONLY);
+
+ size = read(fd, buf, 512);
+
+ close(fd);
+
+ for (i = 0; i < 10; i++)
+ list[i][0] = '\0';
+
+ head = buf;
+ i = 0;
+ while (head - buf < size) {
+ /* Find the next space */
+ for (tmp = head; *tmp != ' '; tmp++) {
+ if (*tmp == '\n')
+ break;
+ if (*tmp == '\0')
+ break;
+ }
+ *tmp = '\0';
+ strcpy(list[i], head);
+ head = tmp + 1;
+ i++;
+ }
+
+ return i-1;
+}
+
+int get_cur_clocksource(char *buf, size_t size)
+{
+ int fd;
+
+ fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_RDONLY);
+
+ size = read(fd, buf, size);
+
+ return 0;
+}
+
+int change_clocksource(char *clocksource)
+{
+ int fd;
+ ssize_t size;
+
+ fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_WRONLY);
+
+ if (fd < 0)
+ return -1;
+
+ size = write(fd, clocksource, strlen(clocksource));
+
+ if (size < 0)
+ return -1;
+
+ close(fd);
+ return 0;
+}
+
+
+int run_tests(int secs)
+{
+ int ret;
+ char buf[255];
+
+ sprintf(buf, "./inconsistency-check -t %i", secs);
+ ret = system(buf);
+ if (ret)
+ return ret;
+ ret = system("./nanosleep");
+ return ret;
+}
+
+
+char clocksource_list[10][30];
+
+int main(int argv, char **argc)
+{
+ char orig_clk[512];
+ int count, i, status;
+ pid_t pid;
+
+ get_cur_clocksource(orig_clk, 512);
+
+ count = get_clocksources(clocksource_list);
+
+ if (change_clocksource(clocksource_list[0])) {
+ printf("Error: You probably need to run this as root\n");
+ return -1;
+ }
+
+ /* Check everything is sane before we start switching asyncrhonously */
+ for (i = 0; i < count; i++) {
+ printf("Validating clocksource %s\n", clocksource_list[i]);
+ if (change_clocksource(clocksource_list[i])) {
+ status = -1;
+ goto out;
+ }
+ if (run_tests(5)) {
+ status = -1;
+ goto out;
+ }
+ }
+
+
+ printf("Running Asynchronous Switching Tests...\n");
+ pid = fork();
+ if (!pid)
+ return run_tests(60);
+
+ while (pid != waitpid(pid, &status, WNOHANG))
+ for (i = 0; i < count; i++)
+ if (change_clocksource(clocksource_list[i])) {
+ status = -1;
+ goto out;
+ }
+out:
+ change_clocksource(orig_clk);
+
+ if (status)
+ return ksft_exit_fail();
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/freq-step.c b/tools/testing/selftests/timers/freq-step.c
new file mode 100644
index 000000000..14a2b77fd
--- /dev/null
+++ b/tools/testing/selftests/timers/freq-step.c
@@ -0,0 +1,271 @@
+/*
+ * This test checks the response of the system clock to frequency
+ * steps made with adjtimex(). The frequency error and stability of
+ * the CLOCK_MONOTONIC clock relative to the CLOCK_MONOTONIC_RAW clock
+ * is measured in two intervals following the step. The test fails if
+ * values from the second interval exceed specified limits.
+ *
+ * Copyright (C) Miroslav Lichvar <mlichvar@redhat.com> 2017
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <sys/timex.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+#define SAMPLES 100
+#define SAMPLE_READINGS 10
+#define MEAN_SAMPLE_INTERVAL 0.1
+#define STEP_INTERVAL 1.0
+#define MAX_PRECISION 100e-9
+#define MAX_FREQ_ERROR 10e-6
+#define MAX_STDDEV 1000e-9
+
+#ifndef ADJ_SETOFFSET
+ #define ADJ_SETOFFSET 0x0100
+#endif
+
+struct sample {
+ double offset;
+ double time;
+};
+
+static time_t mono_raw_base;
+static time_t mono_base;
+static long user_hz;
+static double precision;
+static double mono_freq_offset;
+
+static double diff_timespec(struct timespec *ts1, struct timespec *ts2)
+{
+ return ts1->tv_sec - ts2->tv_sec + (ts1->tv_nsec - ts2->tv_nsec) / 1e9;
+}
+
+static double get_sample(struct sample *sample)
+{
+ double delay, mindelay = 0.0;
+ struct timespec ts1, ts2, ts3;
+ int i;
+
+ for (i = 0; i < SAMPLE_READINGS; i++) {
+ clock_gettime(CLOCK_MONOTONIC_RAW, &ts1);
+ clock_gettime(CLOCK_MONOTONIC, &ts2);
+ clock_gettime(CLOCK_MONOTONIC_RAW, &ts3);
+
+ ts1.tv_sec -= mono_raw_base;
+ ts2.tv_sec -= mono_base;
+ ts3.tv_sec -= mono_raw_base;
+
+ delay = diff_timespec(&ts3, &ts1);
+ if (delay <= 1e-9) {
+ i--;
+ continue;
+ }
+
+ if (!i || delay < mindelay) {
+ sample->offset = diff_timespec(&ts2, &ts1);
+ sample->offset -= delay / 2.0;
+ sample->time = ts1.tv_sec + ts1.tv_nsec / 1e9;
+ mindelay = delay;
+ }
+ }
+
+ return mindelay;
+}
+
+static void reset_ntp_error(void)
+{
+ struct timex txc;
+
+ txc.modes = ADJ_SETOFFSET;
+ txc.time.tv_sec = 0;
+ txc.time.tv_usec = 0;
+
+ if (adjtimex(&txc) < 0) {
+ perror("[FAIL] adjtimex");
+ ksft_exit_fail();
+ }
+}
+
+static void set_frequency(double freq)
+{
+ struct timex txc;
+ int tick_offset;
+
+ tick_offset = 1e6 * freq / user_hz;
+
+ txc.modes = ADJ_TICK | ADJ_FREQUENCY;
+ txc.tick = 1000000 / user_hz + tick_offset;
+ txc.freq = (1e6 * freq - user_hz * tick_offset) * (1 << 16);
+
+ if (adjtimex(&txc) < 0) {
+ perror("[FAIL] adjtimex");
+ ksft_exit_fail();
+ }
+}
+
+static void regress(struct sample *samples, int n, double *intercept,
+ double *slope, double *r_stddev, double *r_max)
+{
+ double x, y, r, x_sum, y_sum, xy_sum, x2_sum, r2_sum;
+ int i;
+
+ x_sum = 0.0, y_sum = 0.0, xy_sum = 0.0, x2_sum = 0.0;
+
+ for (i = 0; i < n; i++) {
+ x = samples[i].time;
+ y = samples[i].offset;
+
+ x_sum += x;
+ y_sum += y;
+ xy_sum += x * y;
+ x2_sum += x * x;
+ }
+
+ *slope = (xy_sum - x_sum * y_sum / n) / (x2_sum - x_sum * x_sum / n);
+ *intercept = (y_sum - *slope * x_sum) / n;
+
+ *r_max = 0.0, r2_sum = 0.0;
+
+ for (i = 0; i < n; i++) {
+ x = samples[i].time;
+ y = samples[i].offset;
+ r = fabs(x * *slope + *intercept - y);
+ if (*r_max < r)
+ *r_max = r;
+ r2_sum += r * r;
+ }
+
+ *r_stddev = sqrt(r2_sum / n);
+}
+
+static int run_test(int calibration, double freq_base, double freq_step)
+{
+ struct sample samples[SAMPLES];
+ double intercept, slope, stddev1, max1, stddev2, max2;
+ double freq_error1, freq_error2;
+ int i;
+
+ set_frequency(freq_base);
+
+ for (i = 0; i < 10; i++)
+ usleep(1e6 * MEAN_SAMPLE_INTERVAL / 10);
+
+ reset_ntp_error();
+
+ set_frequency(freq_base + freq_step);
+
+ for (i = 0; i < 10; i++)
+ usleep(rand() % 2000000 * STEP_INTERVAL / 10);
+
+ set_frequency(freq_base);
+
+ for (i = 0; i < SAMPLES; i++) {
+ usleep(rand() % 2000000 * MEAN_SAMPLE_INTERVAL);
+ get_sample(&samples[i]);
+ }
+
+ if (calibration) {
+ regress(samples, SAMPLES, &intercept, &slope, &stddev1, &max1);
+ mono_freq_offset = slope;
+ printf("CLOCK_MONOTONIC_RAW frequency offset: %11.3f ppm\n",
+ 1e6 * mono_freq_offset);
+ return 0;
+ }
+
+ regress(samples, SAMPLES / 2, &intercept, &slope, &stddev1, &max1);
+ freq_error1 = slope * (1.0 - mono_freq_offset) - mono_freq_offset -
+ freq_base;
+
+ regress(samples + SAMPLES / 2, SAMPLES / 2, &intercept, &slope,
+ &stddev2, &max2);
+ freq_error2 = slope * (1.0 - mono_freq_offset) - mono_freq_offset -
+ freq_base;
+
+ printf("%6.0f %+10.3f %6.0f %7.0f %+10.3f %6.0f %7.0f\t",
+ 1e6 * freq_step,
+ 1e6 * freq_error1, 1e9 * stddev1, 1e9 * max1,
+ 1e6 * freq_error2, 1e9 * stddev2, 1e9 * max2);
+
+ if (fabs(freq_error2) > MAX_FREQ_ERROR || stddev2 > MAX_STDDEV) {
+ printf("[FAIL]\n");
+ return 1;
+ }
+
+ printf("[OK]\n");
+ return 0;
+}
+
+static void init_test(void)
+{
+ struct timespec ts;
+ struct sample sample;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts)) {
+ perror("[FAIL] clock_gettime(CLOCK_MONOTONIC_RAW)");
+ ksft_exit_fail();
+ }
+
+ mono_raw_base = ts.tv_sec;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &ts)) {
+ perror("[FAIL] clock_gettime(CLOCK_MONOTONIC)");
+ ksft_exit_fail();
+ }
+
+ mono_base = ts.tv_sec;
+
+ user_hz = sysconf(_SC_CLK_TCK);
+
+ precision = get_sample(&sample) / 2.0;
+ printf("CLOCK_MONOTONIC_RAW+CLOCK_MONOTONIC precision: %.0f ns\t\t",
+ 1e9 * precision);
+
+ if (precision > MAX_PRECISION)
+ ksft_exit_skip("precision: %.0f ns > MAX_PRECISION: %.0f ns\n",
+ 1e9 * precision, 1e9 * MAX_PRECISION);
+
+ printf("[OK]\n");
+ srand(ts.tv_sec ^ ts.tv_nsec);
+
+ run_test(1, 0.0, 0.0);
+}
+
+int main(int argc, char **argv)
+{
+ double freq_base, freq_step;
+ int i, j, fails = 0;
+
+ init_test();
+
+ printf("Checking response to frequency step:\n");
+ printf(" Step 1st interval 2nd interval\n");
+ printf(" Freq Dev Max Freq Dev Max\n");
+
+ for (i = 2; i >= 0; i--) {
+ for (j = 0; j < 5; j++) {
+ freq_base = (rand() % (1 << 24) - (1 << 23)) / 65536e6;
+ freq_step = 10e-6 * (1 << (6 * i));
+ fails += run_test(0, freq_base, freq_step);
+ }
+ }
+
+ set_frequency(0.0);
+
+ if (fails)
+ return ksft_exit_fail();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c
new file mode 100644
index 000000000..022d3ffe3
--- /dev/null
+++ b/tools/testing/selftests/timers/inconsistency-check.c
@@ -0,0 +1,193 @@
+/* Time inconsistency check test
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2003, 2004, 2005, 2012
+ * (C) Copyright Linaro Limited 2015
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc inconsistency-check.c -o inconsistency-check -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include "../kselftest.h"
+
+#define CALLS_PER_LOOP 64
+#define NSEC_PER_SEC 1000000000ULL
+
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_PROCESS_CPUTIME_ID 2
+#define CLOCK_THREAD_CPUTIME_ID 3
+#define CLOCK_MONOTONIC_RAW 4
+#define CLOCK_REALTIME_COARSE 5
+#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
+#define CLOCK_REALTIME_ALARM 8
+#define CLOCK_BOOTTIME_ALARM 9
+#define CLOCK_HWSPECIFIC 10
+#define CLOCK_TAI 11
+#define NR_CLOCKIDS 12
+
+char *clockstring(int clockid)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ return "CLOCK_REALTIME";
+ case CLOCK_MONOTONIC:
+ return "CLOCK_MONOTONIC";
+ case CLOCK_PROCESS_CPUTIME_ID:
+ return "CLOCK_PROCESS_CPUTIME_ID";
+ case CLOCK_THREAD_CPUTIME_ID:
+ return "CLOCK_THREAD_CPUTIME_ID";
+ case CLOCK_MONOTONIC_RAW:
+ return "CLOCK_MONOTONIC_RAW";
+ case CLOCK_REALTIME_COARSE:
+ return "CLOCK_REALTIME_COARSE";
+ case CLOCK_MONOTONIC_COARSE:
+ return "CLOCK_MONOTONIC_COARSE";
+ case CLOCK_BOOTTIME:
+ return "CLOCK_BOOTTIME";
+ case CLOCK_REALTIME_ALARM:
+ return "CLOCK_REALTIME_ALARM";
+ case CLOCK_BOOTTIME_ALARM:
+ return "CLOCK_BOOTTIME_ALARM";
+ case CLOCK_TAI:
+ return "CLOCK_TAI";
+ };
+ return "UNKNOWN_CLOCKID";
+}
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+ /* use unsigned to avoid false positives on 2038 rollover */
+ if ((unsigned long)a.tv_sec < (unsigned long)b.tv_sec)
+ return 1;
+ if ((unsigned long)a.tv_sec > (unsigned long)b.tv_sec)
+ return 0;
+ if (a.tv_nsec > b.tv_nsec)
+ return 0;
+ return 1;
+}
+
+
+
+int consistency_test(int clock_type, unsigned long seconds)
+{
+ struct timespec list[CALLS_PER_LOOP];
+ int i, inconsistent;
+ long now, then;
+ time_t t;
+ char *start_str;
+
+ clock_gettime(clock_type, &list[0]);
+ now = then = list[0].tv_sec;
+
+ /* timestamp start of test */
+ t = time(0);
+ start_str = ctime(&t);
+
+ while (seconds == -1 || now - then < seconds) {
+ inconsistent = -1;
+
+ /* Fill list */
+ for (i = 0; i < CALLS_PER_LOOP; i++)
+ clock_gettime(clock_type, &list[i]);
+
+ /* Check for inconsistencies */
+ for (i = 0; i < CALLS_PER_LOOP - 1; i++)
+ if (!in_order(list[i], list[i+1]))
+ inconsistent = i;
+
+ /* display inconsistency */
+ if (inconsistent >= 0) {
+ unsigned long long delta;
+
+ printf("\%s\n", start_str);
+ for (i = 0; i < CALLS_PER_LOOP; i++) {
+ if (i == inconsistent)
+ printf("--------------------\n");
+ printf("%lu:%lu\n", list[i].tv_sec,
+ list[i].tv_nsec);
+ if (i == inconsistent + 1)
+ printf("--------------------\n");
+ }
+ delta = list[inconsistent].tv_sec * NSEC_PER_SEC;
+ delta += list[inconsistent].tv_nsec;
+ delta -= list[inconsistent+1].tv_sec * NSEC_PER_SEC;
+ delta -= list[inconsistent+1].tv_nsec;
+ printf("Delta: %llu ns\n", delta);
+ fflush(0);
+ /* timestamp inconsistency*/
+ t = time(0);
+ printf("%s\n", ctime(&t));
+ printf("[FAILED]\n");
+ return -1;
+ }
+ now = list[0].tv_sec;
+ }
+ printf("[OK]\n");
+ return 0;
+}
+
+
+int main(int argc, char *argv[])
+{
+ int clockid, opt;
+ int userclock = CLOCK_REALTIME;
+ int maxclocks = NR_CLOCKIDS;
+ int runtime = 10;
+ struct timespec ts;
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "t:c:")) != -1) {
+ switch (opt) {
+ case 't':
+ runtime = atoi(optarg);
+ break;
+ case 'c':
+ userclock = atoi(optarg);
+ maxclocks = userclock + 1;
+ break;
+ default:
+ printf("Usage: %s [-t <secs>] [-c <clockid>]\n", argv[0]);
+ printf(" -t: Number of seconds to run\n");
+ printf(" -c: clockid to use (default, all clockids)\n");
+ exit(-1);
+ }
+ }
+
+ setbuf(stdout, NULL);
+
+ for (clockid = userclock; clockid < maxclocks; clockid++) {
+
+ if (clockid == CLOCK_HWSPECIFIC)
+ continue;
+
+ if (!clock_gettime(clockid, &ts)) {
+ printf("Consistent %-30s ", clockstring(clockid));
+ if (consistency_test(clockid, runtime))
+ return ksft_exit_fail();
+ }
+ }
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c
new file mode 100644
index 000000000..19e46ed5d
--- /dev/null
+++ b/tools/testing/selftests/timers/leap-a-day.c
@@ -0,0 +1,378 @@
+/* Leap second stress test
+ * by: John Stultz (john.stultz@linaro.org)
+ * (C) Copyright IBM 2012
+ * (C) Copyright 2013, 2015 Linaro Limited
+ * Licensed under the GPLv2
+ *
+ * This test signals the kernel to insert a leap second
+ * every day at midnight GMT. This allows for stessing the
+ * kernel's leap-second behavior, as well as how well applications
+ * handle the leap-second discontinuity.
+ *
+ * Usage: leap-a-day [-s] [-i <num>]
+ *
+ * Options:
+ * -s: Each iteration, set the date to 10 seconds before midnight GMT.
+ * This speeds up the number of leapsecond transitions tested,
+ * but because it calls settimeofday frequently, advancing the
+ * time by 24 hours every ~16 seconds, it may cause application
+ * disruption.
+ *
+ * -i: Number of iterations to run (default: infinite)
+ *
+ * Other notes: Disabling NTP prior to running this is advised, as the two
+ * may conflict in their commands to the kernel.
+ *
+ * To build:
+ * $ gcc leap-a-day.c -o leap-a-day -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <sys/errno.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000ULL
+#define CLOCK_TAI 11
+
+time_t next_leap;
+int error_found;
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+ if (a.tv_sec < b.tv_sec)
+ return 1;
+ if (a.tv_sec > b.tv_sec)
+ return 0;
+ if (a.tv_nsec > b.tv_nsec)
+ return 0;
+ return 1;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+ ts.tv_nsec += ns;
+ while (ts.tv_nsec >= NSEC_PER_SEC) {
+ ts.tv_nsec -= NSEC_PER_SEC;
+ ts.tv_sec++;
+ }
+ return ts;
+}
+
+char *time_state_str(int state)
+{
+ switch (state) {
+ case TIME_OK: return "TIME_OK";
+ case TIME_INS: return "TIME_INS";
+ case TIME_DEL: return "TIME_DEL";
+ case TIME_OOP: return "TIME_OOP";
+ case TIME_WAIT: return "TIME_WAIT";
+ case TIME_BAD: return "TIME_BAD";
+ }
+ return "ERROR";
+}
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+ struct timex tx;
+ int ret;
+
+ /*
+ * We have to call adjtime twice here, as kernels
+ * prior to 6b1859dba01c7 (included in 3.5 and
+ * -stable), had an issue with the state machine
+ * and wouldn't clear the STA_INS/DEL flag directly.
+ */
+ tx.modes = ADJ_STATUS;
+ tx.status = STA_PLL;
+ ret = adjtimex(&tx);
+
+ /* Clear maxerror, as it can cause UNSYNC to be set */
+ tx.modes = ADJ_MAXERROR;
+ tx.maxerror = 0;
+ ret = adjtimex(&tx);
+
+ /* Clear the status */
+ tx.modes = ADJ_STATUS;
+ tx.status = 0;
+ ret = adjtimex(&tx);
+
+ return ret;
+}
+
+/* Make sure we cleanup on ctrl-c */
+void handler(int unused)
+{
+ clear_time_state();
+ exit(0);
+}
+
+void sigalarm(int signo)
+{
+ struct timex tx;
+ int ret;
+
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+
+ if (tx.time.tv_sec < next_leap) {
+ printf("Error: Early timer expiration! (Should be %ld)\n", next_leap);
+ error_found = 1;
+ printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n",
+ tx.time.tv_sec,
+ tx.time.tv_usec,
+ tx.tai,
+ time_state_str(ret));
+ }
+ if (ret != TIME_WAIT) {
+ printf("Error: Timer seeing incorrect NTP state? (Should be TIME_WAIT)\n");
+ error_found = 1;
+ printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n",
+ tx.time.tv_sec,
+ tx.time.tv_usec,
+ tx.tai,
+ time_state_str(ret));
+ }
+}
+
+
+/* Test for known hrtimer failure */
+void test_hrtimer_failure(void)
+{
+ struct timespec now, target;
+
+ clock_gettime(CLOCK_REALTIME, &now);
+ target = timespec_add(now, NSEC_PER_SEC/2);
+ clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &target, NULL);
+ clock_gettime(CLOCK_REALTIME, &now);
+
+ if (!in_order(target, now)) {
+ printf("ERROR: hrtimer early expiration failure observed.\n");
+ error_found = 1;
+ }
+}
+
+int main(int argc, char **argv)
+{
+ timer_t tm1;
+ struct itimerspec its1;
+ struct sigevent se;
+ struct sigaction act;
+ int signum = SIGRTMAX;
+ int settime = 1;
+ int tai_time = 0;
+ int insert = 1;
+ int iterations = 10;
+ int opt;
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "sti:")) != -1) {
+ switch (opt) {
+ case 'w':
+ printf("Only setting leap-flag, not changing time. It could take up to a day for leap to trigger.\n");
+ settime = 0;
+ break;
+ case 'i':
+ iterations = atoi(optarg);
+ break;
+ case 't':
+ tai_time = 1;
+ break;
+ default:
+ printf("Usage: %s [-w] [-i <iterations>]\n", argv[0]);
+ printf(" -w: Set flag and wait for leap second each iteration");
+ printf(" (default sets time to right before leapsecond)\n");
+ printf(" -i: Number of iterations (-1 = infinite, default is 10)\n");
+ printf(" -t: Print TAI time\n");
+ exit(-1);
+ }
+ }
+
+ /* Make sure TAI support is present if -t was used */
+ if (tai_time) {
+ struct timespec ts;
+
+ if (clock_gettime(CLOCK_TAI, &ts)) {
+ printf("System doesn't support CLOCK_TAI\n");
+ ksft_exit_fail();
+ }
+ }
+
+ signal(SIGINT, handler);
+ signal(SIGKILL, handler);
+
+ /* Set up timer signal handler: */
+ sigfillset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = sigalarm;
+ sigaction(signum, &act, NULL);
+
+ if (iterations < 0)
+ printf("This runs continuously. Press ctrl-c to stop\n");
+ else
+ printf("Running for %i iterations. Press ctrl-c to stop\n", iterations);
+
+ printf("\n");
+ while (1) {
+ int ret;
+ struct timespec ts;
+ struct timex tx;
+ time_t now;
+
+ /* Get the current time */
+ clock_gettime(CLOCK_REALTIME, &ts);
+
+ /* Calculate the next possible leap second 23:59:60 GMT */
+ next_leap = ts.tv_sec;
+ next_leap += 86400 - (next_leap % 86400);
+
+ if (settime) {
+ struct timeval tv;
+
+ tv.tv_sec = next_leap - 10;
+ tv.tv_usec = 0;
+ settimeofday(&tv, NULL);
+ printf("Setting time to %s", ctime(&tv.tv_sec));
+ }
+
+ /* Reset NTP time state */
+ clear_time_state();
+
+ /* Set the leap second insert flag */
+ tx.modes = ADJ_STATUS;
+ if (insert)
+ tx.status = STA_INS;
+ else
+ tx.status = STA_DEL;
+ ret = adjtimex(&tx);
+ if (ret < 0) {
+ printf("Error: Problem setting STA_INS/STA_DEL!: %s\n",
+ time_state_str(ret));
+ return ksft_exit_fail();
+ }
+
+ /* Validate STA_INS was set */
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+ if (tx.status != STA_INS && tx.status != STA_DEL) {
+ printf("Error: STA_INS/STA_DEL not set!: %s\n",
+ time_state_str(ret));
+ return ksft_exit_fail();
+ }
+
+ if (tai_time) {
+ printf("Using TAI time,"
+ " no inconsistencies should be seen!\n");
+ }
+
+ printf("Scheduling leap second for %s", ctime(&next_leap));
+
+ /* Set up timer */
+ printf("Setting timer for %ld - %s", next_leap, ctime(&next_leap));
+ memset(&se, 0, sizeof(se));
+ se.sigev_notify = SIGEV_SIGNAL;
+ se.sigev_signo = signum;
+ se.sigev_value.sival_int = 0;
+ if (timer_create(CLOCK_REALTIME, &se, &tm1) == -1) {
+ printf("Error: timer_create failed\n");
+ return ksft_exit_fail();
+ }
+ its1.it_value.tv_sec = next_leap;
+ its1.it_value.tv_nsec = 0;
+ its1.it_interval.tv_sec = 0;
+ its1.it_interval.tv_nsec = 0;
+ timer_settime(tm1, TIMER_ABSTIME, &its1, NULL);
+
+ /* Wake up 3 seconds before leap */
+ ts.tv_sec = next_leap - 3;
+ ts.tv_nsec = 0;
+
+
+ while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL))
+ printf("Something woke us up, returning to sleep\n");
+
+ /* Validate STA_INS is still set */
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+ if (tx.status != STA_INS && tx.status != STA_DEL) {
+ printf("Something cleared STA_INS/STA_DEL, setting it again.\n");
+ tx.modes = ADJ_STATUS;
+ if (insert)
+ tx.status = STA_INS;
+ else
+ tx.status = STA_DEL;
+ ret = adjtimex(&tx);
+ }
+
+ /* Check adjtimex output every half second */
+ now = tx.time.tv_sec;
+ while (now < next_leap + 2) {
+ char buf[26];
+ struct timespec tai;
+ int ret;
+
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+
+ if (tai_time) {
+ clock_gettime(CLOCK_TAI, &tai);
+ printf("%ld sec, %9ld ns\t%s\n",
+ tai.tv_sec,
+ tai.tv_nsec,
+ time_state_str(ret));
+ } else {
+ ctime_r(&tx.time.tv_sec, buf);
+ buf[strlen(buf)-1] = 0; /*remove trailing\n */
+
+ printf("%s + %6ld us (%i)\t%s\n",
+ buf,
+ tx.time.tv_usec,
+ tx.tai,
+ time_state_str(ret));
+ }
+ now = tx.time.tv_sec;
+ /* Sleep for another half second */
+ ts.tv_sec = 0;
+ ts.tv_nsec = NSEC_PER_SEC / 2;
+ clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
+ }
+ /* Switch to using other mode */
+ insert = !insert;
+
+ /* Note if kernel has known hrtimer failure */
+ test_hrtimer_failure();
+
+ printf("Leap complete\n");
+ if (error_found) {
+ printf("Errors observed\n");
+ clear_time_state();
+ return ksft_exit_fail();
+ }
+ printf("\n");
+ if ((iterations != -1) && !(--iterations))
+ break;
+ }
+
+ clear_time_state();
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c
new file mode 100644
index 000000000..dc80728ed
--- /dev/null
+++ b/tools/testing/selftests/timers/leapcrash.c
@@ -0,0 +1,108 @@
+/* Demo leapsecond deadlock
+ * by: John Stultz (john.stultz@linaro.org)
+ * (C) Copyright IBM 2012
+ * (C) Copyright 2013, 2015 Linaro Limited
+ * Licensed under the GPL
+ *
+ * This test demonstrates leapsecond deadlock that is possibe
+ * on kernels from 2.6.26 to 3.3.
+ *
+ * WARNING: THIS WILL LIKELY HARDHANG SYSTEMS AND MAY LOSE DATA
+ * RUN AT YOUR OWN RISK!
+ * To build:
+ * $ gcc leapcrash.c -o leapcrash -lrt
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include "../kselftest.h"
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+ struct timex tx;
+ int ret;
+
+ /*
+ * We have to call adjtime twice here, as kernels
+ * prior to 6b1859dba01c7 (included in 3.5 and
+ * -stable), had an issue with the state machine
+ * and wouldn't clear the STA_INS/DEL flag directly.
+ */
+ tx.modes = ADJ_STATUS;
+ tx.status = STA_PLL;
+ ret = adjtimex(&tx);
+
+ tx.modes = ADJ_STATUS;
+ tx.status = 0;
+ ret = adjtimex(&tx);
+
+ return ret;
+}
+
+/* Make sure we cleanup on ctrl-c */
+void handler(int unused)
+{
+ clear_time_state();
+ exit(0);
+}
+
+
+int main(void)
+{
+ struct timex tx;
+ struct timespec ts;
+ time_t next_leap;
+ int count = 0;
+
+ setbuf(stdout, NULL);
+
+ signal(SIGINT, handler);
+ signal(SIGKILL, handler);
+ printf("This runs for a few minutes. Press ctrl-c to stop\n");
+
+ clear_time_state();
+
+
+ /* Get the current time */
+ clock_gettime(CLOCK_REALTIME, &ts);
+
+ /* Calculate the next possible leap second 23:59:60 GMT */
+ next_leap = ts.tv_sec;
+ next_leap += 86400 - (next_leap % 86400);
+
+ for (count = 0; count < 20; count++) {
+ struct timeval tv;
+
+
+ /* set the time to 2 seconds before the leap */
+ tv.tv_sec = next_leap - 2;
+ tv.tv_usec = 0;
+ if (settimeofday(&tv, NULL)) {
+ printf("Error: You're likely not running with proper (ie: root) permissions\n");
+ return ksft_exit_fail();
+ }
+ tx.modes = 0;
+ adjtimex(&tx);
+
+ /* hammer on adjtime w/ STA_INS */
+ while (tx.time.tv_sec < next_leap + 1) {
+ /* Set the leap second insert flag */
+ tx.modes = ADJ_STATUS;
+ tx.status = STA_INS;
+ adjtimex(&tx);
+ }
+ clear_time_state();
+ printf(".");
+ fflush(stdout);
+ }
+ printf("[OK]\n");
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c
new file mode 100644
index 000000000..7916cf5cc
--- /dev/null
+++ b/tools/testing/selftests/timers/mqueue-lat.c
@@ -0,0 +1,114 @@
+/* Measure mqueue timeout latency
+ * by: john stultz (john.stultz@linaro.org)
+ * (C) Copyright Linaro 2013
+ *
+ * Inspired with permission from example test by:
+ * Romain Francoise <romain@orebokech.com>
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc mqueue-lat.c -o mqueue-lat -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <errno.h>
+#include <mqueue.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define TARGET_TIMEOUT 100000000 /* 100ms in nanoseconds */
+#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+ long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+ ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+ return ret;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+ ts.tv_nsec += ns;
+ while (ts.tv_nsec >= NSEC_PER_SEC) {
+ ts.tv_nsec -= NSEC_PER_SEC;
+ ts.tv_sec++;
+ }
+ return ts;
+}
+
+int mqueue_lat_test(void)
+{
+
+ mqd_t q;
+ struct mq_attr attr;
+ struct timespec start, end, now, target;
+ int i, count, ret;
+
+ q = mq_open("/foo", O_CREAT | O_RDONLY, 0666, NULL);
+ if (q < 0) {
+ perror("mq_open");
+ return -1;
+ }
+ mq_getattr(q, &attr);
+
+
+ count = 100;
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ for (i = 0; i < count; i++) {
+ char buf[attr.mq_msgsize];
+
+ clock_gettime(CLOCK_REALTIME, &now);
+ target = now;
+ target = timespec_add(now, TARGET_TIMEOUT); /* 100ms */
+
+ ret = mq_timedreceive(q, buf, sizeof(buf), NULL, &target);
+ if (ret < 0 && errno != ETIMEDOUT) {
+ perror("mq_timedreceive");
+ return -1;
+ }
+ }
+ clock_gettime(CLOCK_MONOTONIC, &end);
+
+ mq_close(q);
+
+ if ((timespec_sub(start, end)/count) > TARGET_TIMEOUT + UNRESONABLE_LATENCY)
+ return -1;
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int ret;
+
+ printf("Mqueue latency : ");
+ fflush(stdout);
+
+ ret = mqueue_lat_test();
+ if (ret < 0) {
+ printf("[FAILED]\n");
+ return ksft_exit_fail();
+ }
+ printf("[OK]\n");
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c
new file mode 100644
index 000000000..71b5441c2
--- /dev/null
+++ b/tools/testing/selftests/timers/nanosleep.c
@@ -0,0 +1,165 @@
+/* Make sure timers don't return early
+ * by: john stultz (johnstul@us.ibm.com)
+ * John Stultz (john.stultz@linaro.org)
+ * (C) Copyright IBM 2012
+ * (C) Copyright Linaro 2013 2015
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc nanosleep.c -o nanosleep -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_PROCESS_CPUTIME_ID 2
+#define CLOCK_THREAD_CPUTIME_ID 3
+#define CLOCK_MONOTONIC_RAW 4
+#define CLOCK_REALTIME_COARSE 5
+#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
+#define CLOCK_REALTIME_ALARM 8
+#define CLOCK_BOOTTIME_ALARM 9
+#define CLOCK_HWSPECIFIC 10
+#define CLOCK_TAI 11
+#define NR_CLOCKIDS 12
+
+#define UNSUPPORTED 0xf00f
+
+char *clockstring(int clockid)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ return "CLOCK_REALTIME";
+ case CLOCK_MONOTONIC:
+ return "CLOCK_MONOTONIC";
+ case CLOCK_PROCESS_CPUTIME_ID:
+ return "CLOCK_PROCESS_CPUTIME_ID";
+ case CLOCK_THREAD_CPUTIME_ID:
+ return "CLOCK_THREAD_CPUTIME_ID";
+ case CLOCK_MONOTONIC_RAW:
+ return "CLOCK_MONOTONIC_RAW";
+ case CLOCK_REALTIME_COARSE:
+ return "CLOCK_REALTIME_COARSE";
+ case CLOCK_MONOTONIC_COARSE:
+ return "CLOCK_MONOTONIC_COARSE";
+ case CLOCK_BOOTTIME:
+ return "CLOCK_BOOTTIME";
+ case CLOCK_REALTIME_ALARM:
+ return "CLOCK_REALTIME_ALARM";
+ case CLOCK_BOOTTIME_ALARM:
+ return "CLOCK_BOOTTIME_ALARM";
+ case CLOCK_TAI:
+ return "CLOCK_TAI";
+ };
+ return "UNKNOWN_CLOCKID";
+}
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+ if (a.tv_sec < b.tv_sec)
+ return 1;
+ if (a.tv_sec > b.tv_sec)
+ return 0;
+ if (a.tv_nsec > b.tv_nsec)
+ return 0;
+ return 1;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+ ts.tv_nsec += ns;
+ while (ts.tv_nsec >= NSEC_PER_SEC) {
+ ts.tv_nsec -= NSEC_PER_SEC;
+ ts.tv_sec++;
+ }
+ return ts;
+}
+
+int nanosleep_test(int clockid, long long ns)
+{
+ struct timespec now, target, rel;
+
+ /* First check abs time */
+ if (clock_gettime(clockid, &now))
+ return UNSUPPORTED;
+ target = timespec_add(now, ns);
+
+ if (clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL))
+ return UNSUPPORTED;
+ clock_gettime(clockid, &now);
+
+ if (!in_order(target, now))
+ return -1;
+
+ /* Second check reltime */
+ clock_gettime(clockid, &now);
+ rel.tv_sec = 0;
+ rel.tv_nsec = 0;
+ rel = timespec_add(rel, ns);
+ target = timespec_add(now, ns);
+ clock_nanosleep(clockid, 0, &rel, NULL);
+ clock_gettime(clockid, &now);
+
+ if (!in_order(target, now))
+ return -1;
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ long long length;
+ int clockid, ret;
+
+ for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
+
+ /* Skip cputime clockids since nanosleep won't increment cputime */
+ if (clockid == CLOCK_PROCESS_CPUTIME_ID ||
+ clockid == CLOCK_THREAD_CPUTIME_ID ||
+ clockid == CLOCK_HWSPECIFIC)
+ continue;
+
+ printf("Nanosleep %-31s ", clockstring(clockid));
+ fflush(stdout);
+
+ length = 10;
+ while (length <= (NSEC_PER_SEC * 10)) {
+ ret = nanosleep_test(clockid, length);
+ if (ret == UNSUPPORTED) {
+ printf("[UNSUPPORTED]\n");
+ goto next;
+ }
+ if (ret < 0) {
+ printf("[FAILED]\n");
+ return ksft_exit_fail();
+ }
+ length *= 100;
+ }
+ printf("[OK]\n");
+next:
+ ret = 0;
+ }
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c
new file mode 100644
index 000000000..eb3e79ed7
--- /dev/null
+++ b/tools/testing/selftests/timers/nsleep-lat.c
@@ -0,0 +1,180 @@
+/* Measure nanosleep timer latency
+ * by: john stultz (john.stultz@linaro.org)
+ * (C) Copyright Linaro 2013
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc nsleep-lat.c -o nsleep-lat -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
+
+
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_PROCESS_CPUTIME_ID 2
+#define CLOCK_THREAD_CPUTIME_ID 3
+#define CLOCK_MONOTONIC_RAW 4
+#define CLOCK_REALTIME_COARSE 5
+#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
+#define CLOCK_REALTIME_ALARM 8
+#define CLOCK_BOOTTIME_ALARM 9
+#define CLOCK_HWSPECIFIC 10
+#define CLOCK_TAI 11
+#define NR_CLOCKIDS 12
+
+#define UNSUPPORTED 0xf00f
+
+char *clockstring(int clockid)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ return "CLOCK_REALTIME";
+ case CLOCK_MONOTONIC:
+ return "CLOCK_MONOTONIC";
+ case CLOCK_PROCESS_CPUTIME_ID:
+ return "CLOCK_PROCESS_CPUTIME_ID";
+ case CLOCK_THREAD_CPUTIME_ID:
+ return "CLOCK_THREAD_CPUTIME_ID";
+ case CLOCK_MONOTONIC_RAW:
+ return "CLOCK_MONOTONIC_RAW";
+ case CLOCK_REALTIME_COARSE:
+ return "CLOCK_REALTIME_COARSE";
+ case CLOCK_MONOTONIC_COARSE:
+ return "CLOCK_MONOTONIC_COARSE";
+ case CLOCK_BOOTTIME:
+ return "CLOCK_BOOTTIME";
+ case CLOCK_REALTIME_ALARM:
+ return "CLOCK_REALTIME_ALARM";
+ case CLOCK_BOOTTIME_ALARM:
+ return "CLOCK_BOOTTIME_ALARM";
+ case CLOCK_TAI:
+ return "CLOCK_TAI";
+ };
+ return "UNKNOWN_CLOCKID";
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+ ts.tv_nsec += ns;
+ while (ts.tv_nsec >= NSEC_PER_SEC) {
+ ts.tv_nsec -= NSEC_PER_SEC;
+ ts.tv_sec++;
+ }
+ return ts;
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+ long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+ ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+ return ret;
+}
+
+int nanosleep_lat_test(int clockid, long long ns)
+{
+ struct timespec start, end, target;
+ long long latency = 0;
+ int i, count;
+
+ target.tv_sec = ns/NSEC_PER_SEC;
+ target.tv_nsec = ns%NSEC_PER_SEC;
+
+ if (clock_gettime(clockid, &start))
+ return UNSUPPORTED;
+ if (clock_nanosleep(clockid, 0, &target, NULL))
+ return UNSUPPORTED;
+
+ count = 10;
+
+ /* First check relative latency */
+ clock_gettime(clockid, &start);
+ for (i = 0; i < count; i++)
+ clock_nanosleep(clockid, 0, &target, NULL);
+ clock_gettime(clockid, &end);
+
+ if (((timespec_sub(start, end)/count)-ns) > UNRESONABLE_LATENCY) {
+ printf("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns);
+ return -1;
+ }
+
+ /* Next check absolute latency */
+ for (i = 0; i < count; i++) {
+ clock_gettime(clockid, &start);
+ target = timespec_add(start, ns);
+ clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL);
+ clock_gettime(clockid, &end);
+ latency += timespec_sub(target, end);
+ }
+
+ if (latency/count > UNRESONABLE_LATENCY) {
+ printf("Large abs latency: %lld ns :", latency/count);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+
+int main(int argc, char **argv)
+{
+ long long length;
+ int clockid, ret;
+
+ for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
+
+ /* Skip cputime clockids since nanosleep won't increment cputime */
+ if (clockid == CLOCK_PROCESS_CPUTIME_ID ||
+ clockid == CLOCK_THREAD_CPUTIME_ID ||
+ clockid == CLOCK_HWSPECIFIC)
+ continue;
+
+ printf("nsleep latency %-26s ", clockstring(clockid));
+ fflush(stdout);
+
+ length = 10;
+ while (length <= (NSEC_PER_SEC * 10)) {
+ ret = nanosleep_lat_test(clockid, length);
+ if (ret)
+ break;
+ length *= 100;
+
+ }
+
+ if (ret == UNSUPPORTED) {
+ printf("[UNSUPPORTED]\n");
+ continue;
+ }
+ if (ret < 0) {
+ printf("[FAILED]\n");
+ return ksft_exit_fail();
+ }
+ printf("[OK]\n");
+ }
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
new file mode 100644
index 000000000..15cf56d32
--- /dev/null
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2013 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
+ *
+ * Licensed under the terms of the GNU GPL License version 2
+ *
+ * Selftests for a few posix timers interface.
+ *
+ * Kernel loop code stolen from Steven Rostedt <srostedt@redhat.com>
+ */
+
+#include <sys/time.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+#include <time.h>
+#include <pthread.h>
+
+#include "../kselftest.h"
+
+#define DELAY 2
+#define USECS_PER_SEC 1000000
+
+static volatile int done;
+
+/* Busy loop in userspace to elapse ITIMER_VIRTUAL */
+static void user_loop(void)
+{
+ while (!done);
+}
+
+/*
+ * Try to spend as much time as possible in kernelspace
+ * to elapse ITIMER_PROF.
+ */
+static void kernel_loop(void)
+{
+ void *addr = sbrk(0);
+ int err = 0;
+
+ while (!done && !err) {
+ err = brk(addr + 4096);
+ err |= brk(addr);
+ }
+}
+
+/*
+ * Sleep until ITIMER_REAL expiration.
+ */
+static void idle_loop(void)
+{
+ pause();
+}
+
+static void sig_handler(int nr)
+{
+ done = 1;
+}
+
+/*
+ * Check the expected timer expiration matches the GTOD elapsed delta since
+ * we armed the timer. Keep a 0.5 sec error margin due to various jitter.
+ */
+static int check_diff(struct timeval start, struct timeval end)
+{
+ long long diff;
+
+ diff = end.tv_usec - start.tv_usec;
+ diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC;
+
+ if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
+ printf("Diff too high: %lld..", diff);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int check_itimer(int which)
+{
+ int err;
+ struct timeval start, end;
+ struct itimerval val = {
+ .it_value.tv_sec = DELAY,
+ };
+
+ printf("Check itimer ");
+
+ if (which == ITIMER_VIRTUAL)
+ printf("virtual... ");
+ else if (which == ITIMER_PROF)
+ printf("prof... ");
+ else if (which == ITIMER_REAL)
+ printf("real... ");
+
+ fflush(stdout);
+
+ done = 0;
+
+ if (which == ITIMER_VIRTUAL)
+ signal(SIGVTALRM, sig_handler);
+ else if (which == ITIMER_PROF)
+ signal(SIGPROF, sig_handler);
+ else if (which == ITIMER_REAL)
+ signal(SIGALRM, sig_handler);
+
+ err = gettimeofday(&start, NULL);
+ if (err < 0) {
+ perror("Can't call gettimeofday()\n");
+ return -1;
+ }
+
+ err = setitimer(which, &val, NULL);
+ if (err < 0) {
+ perror("Can't set timer\n");
+ return -1;
+ }
+
+ if (which == ITIMER_VIRTUAL)
+ user_loop();
+ else if (which == ITIMER_PROF)
+ kernel_loop();
+ else if (which == ITIMER_REAL)
+ idle_loop();
+
+ err = gettimeofday(&end, NULL);
+ if (err < 0) {
+ perror("Can't call gettimeofday()\n");
+ return -1;
+ }
+
+ if (!check_diff(start, end))
+ printf("[OK]\n");
+ else
+ printf("[FAIL]\n");
+
+ return 0;
+}
+
+static int check_timer_create(int which)
+{
+ int err;
+ timer_t id;
+ struct timeval start, end;
+ struct itimerspec val = {
+ .it_value.tv_sec = DELAY,
+ };
+
+ printf("Check timer_create() ");
+ if (which == CLOCK_THREAD_CPUTIME_ID) {
+ printf("per thread... ");
+ } else if (which == CLOCK_PROCESS_CPUTIME_ID) {
+ printf("per process... ");
+ }
+ fflush(stdout);
+
+ done = 0;
+ err = timer_create(which, NULL, &id);
+ if (err < 0) {
+ perror("Can't create timer\n");
+ return -1;
+ }
+ signal(SIGALRM, sig_handler);
+
+ err = gettimeofday(&start, NULL);
+ if (err < 0) {
+ perror("Can't call gettimeofday()\n");
+ return -1;
+ }
+
+ err = timer_settime(id, 0, &val, NULL);
+ if (err < 0) {
+ perror("Can't set timer\n");
+ return -1;
+ }
+
+ user_loop();
+
+ err = gettimeofday(&end, NULL);
+ if (err < 0) {
+ perror("Can't call gettimeofday()\n");
+ return -1;
+ }
+
+ if (!check_diff(start, end))
+ printf("[OK]\n");
+ else
+ printf("[FAIL]\n");
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ printf("Testing posix timers. False negative may happen on CPU execution \n");
+ printf("based timers if other threads run on the CPU...\n");
+
+ if (check_itimer(ITIMER_VIRTUAL) < 0)
+ return ksft_exit_fail();
+
+ if (check_itimer(ITIMER_PROF) < 0)
+ return ksft_exit_fail();
+
+ if (check_itimer(ITIMER_REAL) < 0)
+ return ksft_exit_fail();
+
+ if (check_timer_create(CLOCK_THREAD_CPUTIME_ID) < 0)
+ return ksft_exit_fail();
+
+ /*
+ * It's unfortunately hard to reliably test a timer expiration
+ * on parallel multithread cputime. We could arm it to expire
+ * on DELAY * nr_threads, with nr_threads busy looping, then wait
+ * the normal DELAY since the time is elapsing nr_threads faster.
+ * But for that we need to ensure we have real physical free CPUs
+ * to ensure true parallelism. So test only one thread until we
+ * find a better solution.
+ */
+ if (check_timer_create(CLOCK_PROCESS_CPUTIME_ID) < 0)
+ return ksft_exit_fail();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
new file mode 100644
index 000000000..b41d8dd0c
--- /dev/null
+++ b/tools/testing/selftests/timers/raw_skew.c
@@ -0,0 +1,148 @@
+/* CLOCK_MONOTONIC vs CLOCK_MONOTONIC_RAW skew test
+ * by: john stultz (johnstul@us.ibm.com)
+ * John Stultz <john.stultz@linaro.org>
+ * (C) Copyright IBM 2012
+ * (C) Copyright Linaro Limited 2015
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc raw_skew.c -o raw_skew -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include "../kselftest.h"
+
+#define CLOCK_MONOTONIC_RAW 4
+#define NSEC_PER_SEC 1000000000LL
+
+#define shift_right(x, s) ({ \
+ __typeof__(x) __x = (x); \
+ __typeof__(s) __s = (s); \
+ __x < 0 ? -(-__x >> __s) : __x >> __s; \
+})
+
+long long llabs(long long val)
+{
+ if (val < 0)
+ val = -val;
+ return val;
+}
+
+unsigned long long ts_to_nsec(struct timespec ts)
+{
+ return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
+struct timespec nsec_to_ts(long long ns)
+{
+ struct timespec ts;
+
+ ts.tv_sec = ns/NSEC_PER_SEC;
+ ts.tv_nsec = ns%NSEC_PER_SEC;
+ return ts;
+}
+
+long long diff_timespec(struct timespec start, struct timespec end)
+{
+ long long start_ns, end_ns;
+
+ start_ns = ts_to_nsec(start);
+ end_ns = ts_to_nsec(end);
+ return end_ns - start_ns;
+}
+
+void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw)
+{
+ struct timespec start, mid, end;
+ long long diff = 0, tmp;
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ long long newdiff;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ clock_gettime(CLOCK_MONOTONIC_RAW, &mid);
+ clock_gettime(CLOCK_MONOTONIC, &end);
+
+ newdiff = diff_timespec(start, end);
+ if (diff == 0 || newdiff < diff) {
+ diff = newdiff;
+ *raw = mid;
+ tmp = (ts_to_nsec(start) + ts_to_nsec(end))/2;
+ *mon = nsec_to_ts(tmp);
+ }
+ }
+}
+
+int main(int argv, char **argc)
+{
+ struct timespec mon, raw, start, end;
+ long long delta1, delta2, interval, eppm, ppm;
+ struct timex tx1, tx2;
+
+ setbuf(stdout, NULL);
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &raw)) {
+ printf("ERR: NO CLOCK_MONOTONIC_RAW\n");
+ return -1;
+ }
+
+ tx1.modes = 0;
+ adjtimex(&tx1);
+ get_monotonic_and_raw(&mon, &raw);
+ start = mon;
+ delta1 = diff_timespec(mon, raw);
+
+ if (tx1.offset)
+ printf("WARNING: ADJ_OFFSET in progress, this will cause inaccurate results\n");
+
+ printf("Estimating clock drift: ");
+ fflush(stdout);
+ sleep(120);
+
+ get_monotonic_and_raw(&mon, &raw);
+ end = mon;
+ tx2.modes = 0;
+ adjtimex(&tx2);
+ delta2 = diff_timespec(mon, raw);
+
+ interval = diff_timespec(start, end);
+
+ /* calculate measured ppm between MONOTONIC and MONOTONIC_RAW */
+ eppm = ((delta2-delta1)*NSEC_PER_SEC)/interval;
+ eppm = -eppm;
+ printf("%lld.%i(est)", eppm/1000, abs((int)(eppm%1000)));
+
+ /* Avg the two actual freq samples adjtimex gave us */
+ ppm = (tx1.freq + tx2.freq) * 1000 / 2;
+ ppm = (long long)tx1.freq * 1000;
+ ppm = shift_right(ppm, 16);
+ printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000)));
+
+ if (llabs(eppm - ppm) > 1000) {
+ if (tx1.offset || tx2.offset ||
+ tx1.freq != tx2.freq || tx1.tick != tx2.tick) {
+ printf(" [SKIP]\n");
+ return ksft_exit_skip("The clock was adjusted externally. Shutdown NTPd or other time sync daemons\n");
+ }
+ printf(" [FAILED]\n");
+ return ksft_exit_fail();
+ }
+ printf(" [OK]\n");
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/rtcpie.c b/tools/testing/selftests/timers/rtcpie.c
new file mode 100644
index 000000000..4ef2184f1
--- /dev/null
+++ b/tools/testing/selftests/timers/rtcpie.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Real Time Clock Periodic Interrupt test program
+ *
+ * Since commit 6610e0893b8bc ("RTC: Rework RTC code to use timerqueue for
+ * events"), PIE are completely handled using hrtimers, without actually using
+ * any underlying hardware RTC.
+ *
+ */
+
+#include <stdio.h>
+#include <linux/rtc.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+
+/*
+ * This expects the new RTC class driver framework, working with
+ * clocks that will often not be clones of what the PC-AT had.
+ * Use the command line to specify another RTC if you need one.
+ */
+static const char default_rtc[] = "/dev/rtc0";
+
+int main(int argc, char **argv)
+{
+ int i, fd, retval, irqcount = 0;
+ unsigned long tmp, data, old_pie_rate;
+ const char *rtc = default_rtc;
+ struct timeval start, end, diff;
+
+ switch (argc) {
+ case 2:
+ rtc = argv[1];
+ break;
+ case 1:
+ fd = open(default_rtc, O_RDONLY);
+ if (fd == -1) {
+ printf("Default RTC %s does not exist. Test Skipped!\n", default_rtc);
+ exit(KSFT_SKIP);
+ }
+ close(fd);
+ break;
+ default:
+ fprintf(stderr, "usage: rtctest [rtcdev] [d]\n");
+ return 1;
+ }
+
+ fd = open(rtc, O_RDONLY);
+
+ if (fd == -1) {
+ perror(rtc);
+ exit(errno);
+ }
+
+ /* Read periodic IRQ rate */
+ retval = ioctl(fd, RTC_IRQP_READ, &old_pie_rate);
+ if (retval == -1) {
+ /* not all RTCs support periodic IRQs */
+ if (errno == EINVAL) {
+ fprintf(stderr, "\nNo periodic IRQ support\n");
+ goto done;
+ }
+ perror("RTC_IRQP_READ ioctl");
+ exit(errno);
+ }
+ fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", old_pie_rate);
+
+ fprintf(stderr, "Counting 20 interrupts at:");
+ fflush(stderr);
+
+ /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
+ for (tmp=2; tmp<=64; tmp*=2) {
+
+ retval = ioctl(fd, RTC_IRQP_SET, tmp);
+ if (retval == -1) {
+ /* not all RTCs can change their periodic IRQ rate */
+ if (errno == EINVAL) {
+ fprintf(stderr,
+ "\n...Periodic IRQ rate is fixed\n");
+ goto done;
+ }
+ perror("RTC_IRQP_SET ioctl");
+ exit(errno);
+ }
+
+ fprintf(stderr, "\n%ldHz:\t", tmp);
+ fflush(stderr);
+
+ /* Enable periodic interrupts */
+ retval = ioctl(fd, RTC_PIE_ON, 0);
+ if (retval == -1) {
+ perror("RTC_PIE_ON ioctl");
+ exit(errno);
+ }
+
+ for (i=1; i<21; i++) {
+ gettimeofday(&start, NULL);
+ /* This blocks */
+ retval = read(fd, &data, sizeof(unsigned long));
+ if (retval == -1) {
+ perror("read");
+ exit(errno);
+ }
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ if (diff.tv_sec > 0 ||
+ diff.tv_usec > ((1000000L / tmp) * 1.10)) {
+ fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n",
+ diff.tv_sec, diff.tv_usec,
+ (1000000L / tmp));
+ fflush(stdout);
+ exit(-1);
+ }
+
+ fprintf(stderr, " %d",i);
+ fflush(stderr);
+ irqcount++;
+ }
+
+ /* Disable periodic interrupts */
+ retval = ioctl(fd, RTC_PIE_OFF, 0);
+ if (retval == -1) {
+ perror("RTC_PIE_OFF ioctl");
+ exit(errno);
+ }
+ }
+
+done:
+ ioctl(fd, RTC_IRQP_SET, old_pie_rate);
+
+ fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
+
+ close(fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c
new file mode 100644
index 000000000..688cfd81b
--- /dev/null
+++ b/tools/testing/selftests/timers/set-2038.c
@@ -0,0 +1,133 @@
+/* Time bounds setting test
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2012
+ * Licensed under the GPLv2
+ *
+ * NOTE: This is a meta-test which sets the time to edge cases then
+ * uses other tests to detect problems. Thus this test requires that
+ * the inconsistency-check and nanosleep tests be present in the same
+ * directory it is run from.
+ *
+ * To build:
+ * $ gcc set-2038.c -o set-2038 -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/time.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000LL
+
+#define KTIME_MAX ((long long)~((unsigned long long)1 << 63))
+#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
+
+#define YEAR_1901 (-0x7fffffffL)
+#define YEAR_1970 1
+#define YEAR_2038 0x7fffffffL /*overflows 32bit time_t */
+#define YEAR_2262 KTIME_SEC_MAX /*overflows 64bit ktime_t */
+#define YEAR_MAX ((long long)((1ULL<<63)-1)) /*overflows 64bit time_t */
+
+int is32bits(void)
+{
+ return (sizeof(long) == 4);
+}
+
+int settime(long long time)
+{
+ struct timeval now;
+ int ret;
+
+ now.tv_sec = (time_t)time;
+ now.tv_usec = 0;
+
+ ret = settimeofday(&now, NULL);
+
+ printf("Setting time to 0x%lx: %d\n", (long)time, ret);
+ return ret;
+}
+
+int do_tests(void)
+{
+ int ret;
+
+ ret = system("date");
+ ret = system("./inconsistency-check -c 0 -t 20");
+ ret |= system("./nanosleep");
+ ret |= system("./nsleep-lat");
+ return ret;
+
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = 0;
+ int opt, dangerous = 0;
+ time_t start;
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "d")) != -1) {
+ switch (opt) {
+ case 'd':
+ dangerous = 1;
+ }
+ }
+
+ start = time(0);
+
+ /* First test that crazy values don't work */
+ if (!settime(YEAR_1901)) {
+ ret = -1;
+ goto out;
+ }
+ if (!settime(YEAR_MAX)) {
+ ret = -1;
+ goto out;
+ }
+ if (!is32bits() && !settime(YEAR_2262)) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Now test behavior near edges */
+ settime(YEAR_1970);
+ ret = do_tests();
+ if (ret)
+ goto out;
+
+ settime(YEAR_2038 - 600);
+ ret = do_tests();
+ if (ret)
+ goto out;
+
+ /* The rest of the tests can blowup on 32bit systems */
+ if (is32bits() && !dangerous)
+ goto out;
+ /* Test rollover behavior 32bit edge */
+ settime(YEAR_2038 - 10);
+ ret = do_tests();
+ if (ret)
+ goto out;
+
+ settime(YEAR_2262 - 600);
+ ret = do_tests();
+
+out:
+ /* restore clock */
+ settime(start);
+ if (ret)
+ return ksft_exit_fail();
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c
new file mode 100644
index 000000000..8c4179ee2
--- /dev/null
+++ b/tools/testing/selftests/timers/set-tai.c
@@ -0,0 +1,69 @@
+/* Set tai offset
+ * by: John Stultz <john.stultz@linaro.org>
+ * (C) Copyright Linaro 2013
+ * Licensed under the GPLv2
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include "../kselftest.h"
+
+int set_tai(int offset)
+{
+ struct timex tx;
+
+ memset(&tx, 0, sizeof(tx));
+
+ tx.modes = ADJ_TAI;
+ tx.constant = offset;
+
+ return adjtimex(&tx);
+}
+
+int get_tai(void)
+{
+ struct timex tx;
+
+ memset(&tx, 0, sizeof(tx));
+
+ adjtimex(&tx);
+ return tx.tai;
+}
+
+int main(int argc, char **argv)
+{
+ int i, ret;
+
+ ret = get_tai();
+ printf("tai offset started at %i\n", ret);
+
+ printf("Checking tai offsets can be properly set: ");
+ fflush(stdout);
+ for (i = 1; i <= 60; i++) {
+ ret = set_tai(i);
+ ret = get_tai();
+ if (ret != i) {
+ printf("[FAILED] expected: %i got %i\n", i, ret);
+ return ksft_exit_fail();
+ }
+ }
+ printf("[OK]\n");
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c
new file mode 100644
index 000000000..50da45437
--- /dev/null
+++ b/tools/testing/selftests/timers/set-timer-lat.c
@@ -0,0 +1,283 @@
+/* set_timer latency test
+ * John Stultz (john.stultz@linaro.org)
+ * (C) Copyright Linaro 2014
+ * Licensed under the GPLv2
+ *
+ * This test makes sure the set_timer api is correct
+ *
+ * To build:
+ * $ gcc set-timer-lat.c -o set-timer-lat -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include "../kselftest.h"
+
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_PROCESS_CPUTIME_ID 2
+#define CLOCK_THREAD_CPUTIME_ID 3
+#define CLOCK_MONOTONIC_RAW 4
+#define CLOCK_REALTIME_COARSE 5
+#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
+#define CLOCK_REALTIME_ALARM 8
+#define CLOCK_BOOTTIME_ALARM 9
+#define CLOCK_HWSPECIFIC 10
+#define CLOCK_TAI 11
+#define NR_CLOCKIDS 12
+
+
+#define NSEC_PER_SEC 1000000000ULL
+#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
+
+#define TIMER_SECS 1
+int alarmcount;
+int clock_id;
+struct timespec start_time;
+long long max_latency_ns;
+int timer_fired_early;
+
+char *clockstring(int clockid)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ return "CLOCK_REALTIME";
+ case CLOCK_MONOTONIC:
+ return "CLOCK_MONOTONIC";
+ case CLOCK_PROCESS_CPUTIME_ID:
+ return "CLOCK_PROCESS_CPUTIME_ID";
+ case CLOCK_THREAD_CPUTIME_ID:
+ return "CLOCK_THREAD_CPUTIME_ID";
+ case CLOCK_MONOTONIC_RAW:
+ return "CLOCK_MONOTONIC_RAW";
+ case CLOCK_REALTIME_COARSE:
+ return "CLOCK_REALTIME_COARSE";
+ case CLOCK_MONOTONIC_COARSE:
+ return "CLOCK_MONOTONIC_COARSE";
+ case CLOCK_BOOTTIME:
+ return "CLOCK_BOOTTIME";
+ case CLOCK_REALTIME_ALARM:
+ return "CLOCK_REALTIME_ALARM";
+ case CLOCK_BOOTTIME_ALARM:
+ return "CLOCK_BOOTTIME_ALARM";
+ case CLOCK_TAI:
+ return "CLOCK_TAI";
+ };
+ return "UNKNOWN_CLOCKID";
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+ long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+ ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+ return ret;
+}
+
+
+void sigalarm(int signo)
+{
+ long long delta_ns;
+ struct timespec ts;
+
+ clock_gettime(clock_id, &ts);
+ alarmcount++;
+
+ delta_ns = timespec_sub(start_time, ts);
+ delta_ns -= NSEC_PER_SEC * TIMER_SECS * alarmcount;
+
+ if (delta_ns < 0)
+ timer_fired_early = 1;
+
+ if (delta_ns > max_latency_ns)
+ max_latency_ns = delta_ns;
+}
+
+void describe_timer(int flags, int interval)
+{
+ printf("%-22s %s %s ",
+ clockstring(clock_id),
+ flags ? "ABSTIME":"RELTIME",
+ interval ? "PERIODIC":"ONE-SHOT");
+}
+
+int setup_timer(int clock_id, int flags, int interval, timer_t *tm1)
+{
+ struct sigevent se;
+ struct itimerspec its1, its2;
+ int err;
+
+ /* Set up timer: */
+ memset(&se, 0, sizeof(se));
+ se.sigev_notify = SIGEV_SIGNAL;
+ se.sigev_signo = SIGRTMAX;
+ se.sigev_value.sival_int = 0;
+
+ max_latency_ns = 0;
+ alarmcount = 0;
+ timer_fired_early = 0;
+
+ err = timer_create(clock_id, &se, tm1);
+ if (err) {
+ if ((clock_id == CLOCK_REALTIME_ALARM) ||
+ (clock_id == CLOCK_BOOTTIME_ALARM)) {
+ printf("%-22s %s missing CAP_WAKE_ALARM? : [UNSUPPORTED]\n",
+ clockstring(clock_id),
+ flags ? "ABSTIME":"RELTIME");
+ /* Indicate timer isn't set, so caller doesn't wait */
+ return 1;
+ }
+ printf("%s - timer_create() failed\n", clockstring(clock_id));
+ return -1;
+ }
+
+ clock_gettime(clock_id, &start_time);
+ if (flags) {
+ its1.it_value = start_time;
+ its1.it_value.tv_sec += TIMER_SECS;
+ } else {
+ its1.it_value.tv_sec = TIMER_SECS;
+ its1.it_value.tv_nsec = 0;
+ }
+ its1.it_interval.tv_sec = interval;
+ its1.it_interval.tv_nsec = 0;
+
+ err = timer_settime(*tm1, flags, &its1, &its2);
+ if (err) {
+ printf("%s - timer_settime() failed\n", clockstring(clock_id));
+ return -1;
+ }
+
+ return 0;
+}
+
+int check_timer_latency(int flags, int interval)
+{
+ int err = 0;
+
+ describe_timer(flags, interval);
+ printf("timer fired early: %7d : ", timer_fired_early);
+ if (!timer_fired_early) {
+ printf("[OK]\n");
+ } else {
+ printf("[FAILED]\n");
+ err = -1;
+ }
+
+ describe_timer(flags, interval);
+ printf("max latency: %10lld ns : ", max_latency_ns);
+
+ if (max_latency_ns < UNRESONABLE_LATENCY) {
+ printf("[OK]\n");
+ } else {
+ printf("[FAILED]\n");
+ err = -1;
+ }
+ return err;
+}
+
+int check_alarmcount(int flags, int interval)
+{
+ describe_timer(flags, interval);
+ printf("count: %19d : ", alarmcount);
+ if (alarmcount == 1) {
+ printf("[OK]\n");
+ return 0;
+ }
+ printf("[FAILED]\n");
+ return -1;
+}
+
+int do_timer(int clock_id, int flags)
+{
+ timer_t tm1;
+ const int interval = TIMER_SECS;
+ int err;
+
+ err = setup_timer(clock_id, flags, interval, &tm1);
+ /* Unsupported case - return 0 to not fail the test */
+ if (err)
+ return err == 1 ? 0 : err;
+
+ while (alarmcount < 5)
+ sleep(1);
+
+ timer_delete(tm1);
+ return check_timer_latency(flags, interval);
+}
+
+int do_timer_oneshot(int clock_id, int flags)
+{
+ timer_t tm1;
+ const int interval = 0;
+ struct timeval timeout;
+ int err;
+
+ err = setup_timer(clock_id, flags, interval, &tm1);
+ /* Unsupported case - return 0 to not fail the test */
+ if (err)
+ return err == 1 ? 0 : err;
+
+ memset(&timeout, 0, sizeof(timeout));
+ timeout.tv_sec = 5;
+ do {
+ err = select(0, NULL, NULL, NULL, &timeout);
+ } while (err == -1 && errno == EINTR);
+
+ timer_delete(tm1);
+ err = check_timer_latency(flags, interval);
+ err |= check_alarmcount(flags, interval);
+ return err;
+}
+
+int main(void)
+{
+ struct sigaction act;
+ int signum = SIGRTMAX;
+ int ret = 0;
+
+ /* Set up signal handler: */
+ sigfillset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = sigalarm;
+ sigaction(signum, &act, NULL);
+
+ printf("Setting timers for every %i seconds\n", TIMER_SECS);
+ for (clock_id = 0; clock_id < NR_CLOCKIDS; clock_id++) {
+
+ if ((clock_id == CLOCK_PROCESS_CPUTIME_ID) ||
+ (clock_id == CLOCK_THREAD_CPUTIME_ID) ||
+ (clock_id == CLOCK_MONOTONIC_RAW) ||
+ (clock_id == CLOCK_REALTIME_COARSE) ||
+ (clock_id == CLOCK_MONOTONIC_COARSE) ||
+ (clock_id == CLOCK_HWSPECIFIC))
+ continue;
+
+ ret |= do_timer(clock_id, TIMER_ABSTIME);
+ ret |= do_timer(clock_id, 0);
+ ret |= do_timer_oneshot(clock_id, TIMER_ABSTIME);
+ ret |= do_timer_oneshot(clock_id, 0);
+ }
+ if (ret)
+ return ksft_exit_fail();
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/set-tz.c b/tools/testing/selftests/timers/set-tz.c
new file mode 100644
index 000000000..62bd33eb1
--- /dev/null
+++ b/tools/testing/selftests/timers/set-tz.c
@@ -0,0 +1,110 @@
+/* Set tz value
+ * by: John Stultz <john.stultz@linaro.org>
+ * (C) Copyright Linaro 2016
+ * Licensed under the GPLv2
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include "../kselftest.h"
+
+int set_tz(int min, int dst)
+{
+ struct timezone tz;
+
+ tz.tz_minuteswest = min;
+ tz.tz_dsttime = dst;
+
+ return settimeofday(0, &tz);
+}
+
+int get_tz_min(void)
+{
+ struct timezone tz;
+ struct timeval tv;
+
+ memset(&tz, 0, sizeof(tz));
+ gettimeofday(&tv, &tz);
+ return tz.tz_minuteswest;
+}
+
+int get_tz_dst(void)
+{
+ struct timezone tz;
+ struct timeval tv;
+
+ memset(&tz, 0, sizeof(tz));
+ gettimeofday(&tv, &tz);
+ return tz.tz_dsttime;
+}
+
+int main(int argc, char **argv)
+{
+ int i, ret;
+ int min, dst;
+
+ min = get_tz_min();
+ dst = get_tz_dst();
+ printf("tz_minuteswest started at %i, dst at %i\n", min, dst);
+
+ printf("Checking tz_minuteswest can be properly set: ");
+ fflush(stdout);
+ for (i = -15*60; i < 15*60; i += 30) {
+ ret = set_tz(i, dst);
+ ret = get_tz_min();
+ if (ret != i) {
+ printf("[FAILED] expected: %i got %i\n", i, ret);
+ goto err;
+ }
+ }
+ printf("[OK]\n");
+
+ printf("Checking invalid tz_minuteswest values are caught: ");
+ fflush(stdout);
+
+ if (!set_tz(-15*60-1, dst)) {
+ printf("[FAILED] %i didn't return failure!\n", -15*60-1);
+ goto err;
+ }
+
+ if (!set_tz(15*60+1, dst)) {
+ printf("[FAILED] %i didn't return failure!\n", 15*60+1);
+ goto err;
+ }
+
+ if (!set_tz(-24*60, dst)) {
+ printf("[FAILED] %i didn't return failure!\n", -24*60);
+ goto err;
+ }
+
+ if (!set_tz(24*60, dst)) {
+ printf("[FAILED] %i didn't return failure!\n", 24*60);
+ goto err;
+ }
+
+ printf("[OK]\n");
+
+ set_tz(min, dst);
+ return ksft_exit_pass();
+
+err:
+ set_tz(min, dst);
+ return ksft_exit_fail();
+}
diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c
new file mode 100644
index 000000000..022b711c7
--- /dev/null
+++ b/tools/testing/selftests/timers/skew_consistency.c
@@ -0,0 +1,78 @@
+/* ADJ_FREQ Skew consistency test
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2012
+ * Licensed under the GPLv2
+ *
+ * NOTE: This is a meta-test which cranks the ADJ_FREQ knob back
+ * and forth and watches for consistency problems. Thus this test requires
+ * that the inconsistency-check tests be present in the same directory it
+ * is run from.
+ *
+ * To build:
+ * $ gcc skew_consistency.c -o skew_consistency -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000LL
+
+int main(int argv, char **argc)
+{
+ struct timex tx;
+ int ret, ppm;
+ pid_t pid;
+
+
+ printf("Running Asynchronous Frequency Changing Tests...\n");
+
+ pid = fork();
+ if (!pid)
+ return system("./inconsistency-check -c 1 -t 600");
+
+ ppm = 500;
+ ret = 0;
+
+ while (pid != waitpid(pid, &ret, WNOHANG)) {
+ ppm = -ppm;
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = ppm << 16;
+ adjtimex(&tx);
+ usleep(500000);
+ }
+
+ /* Set things back */
+ tx.modes = ADJ_FREQUENCY;
+ tx.offset = 0;
+ adjtimex(&tx);
+
+
+ if (ret) {
+ printf("[FAILED]\n");
+ return ksft_exit_fail();
+ }
+ printf("[OK]\n");
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c
new file mode 100644
index 000000000..cf3e48919
--- /dev/null
+++ b/tools/testing/selftests/timers/threadtest.c
@@ -0,0 +1,193 @@
+/* threadtest.c
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2004, 2005, 2006, 2012
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc threadtest.c -o threadtest -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+#include "../kselftest.h"
+
+/* serializes shared list access */
+pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER;
+/* serializes console output */
+pthread_mutex_t print_lock = PTHREAD_MUTEX_INITIALIZER;
+
+
+#define MAX_THREADS 128
+#define LISTSIZE 128
+
+int done = 0;
+
+struct timespec global_list[LISTSIZE];
+int listcount = 0;
+
+
+void checklist(struct timespec *list, int size)
+{
+ int i, j;
+ struct timespec *a, *b;
+
+ /* scan the list */
+ for (i = 0; i < size-1; i++) {
+ a = &list[i];
+ b = &list[i+1];
+
+ /* look for any time inconsistencies */
+ if ((b->tv_sec <= a->tv_sec) &&
+ (b->tv_nsec < a->tv_nsec)) {
+
+ /* flag other threads */
+ done = 1;
+
+ /*serialize printing to avoid junky output*/
+ pthread_mutex_lock(&print_lock);
+
+ /* dump the list */
+ printf("\n");
+ for (j = 0; j < size; j++) {
+ if (j == i)
+ printf("---------------\n");
+ printf("%lu:%lu\n", list[j].tv_sec, list[j].tv_nsec);
+ if (j == i+1)
+ printf("---------------\n");
+ }
+ printf("[FAILED]\n");
+
+ pthread_mutex_unlock(&print_lock);
+ }
+ }
+}
+
+/* The shared thread shares a global list
+ * that each thread fills while holding the lock.
+ * This stresses clock syncronization across cpus.
+ */
+void *shared_thread(void *arg)
+{
+ while (!done) {
+ /* protect the list */
+ pthread_mutex_lock(&list_lock);
+
+ /* see if we're ready to check the list */
+ if (listcount >= LISTSIZE) {
+ checklist(global_list, LISTSIZE);
+ listcount = 0;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &global_list[listcount++]);
+
+ pthread_mutex_unlock(&list_lock);
+ }
+ return NULL;
+}
+
+
+/* Each independent thread fills in its own
+ * list. This stresses clock_gettime() lock contention.
+ */
+void *independent_thread(void *arg)
+{
+ struct timespec my_list[LISTSIZE];
+ int count;
+
+ while (!done) {
+ /* fill the list */
+ for (count = 0; count < LISTSIZE; count++)
+ clock_gettime(CLOCK_MONOTONIC, &my_list[count]);
+ checklist(my_list, LISTSIZE);
+ }
+ return NULL;
+}
+
+#define DEFAULT_THREAD_COUNT 8
+#define DEFAULT_RUNTIME 30
+
+int main(int argc, char **argv)
+{
+ int thread_count, i;
+ time_t start, now, runtime;
+ char buf[255];
+ pthread_t pth[MAX_THREADS];
+ int opt;
+ void *tret;
+ int ret = 0;
+ void *(*thread)(void *) = shared_thread;
+
+ thread_count = DEFAULT_THREAD_COUNT;
+ runtime = DEFAULT_RUNTIME;
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "t:n:i")) != -1) {
+ switch (opt) {
+ case 't':
+ runtime = atoi(optarg);
+ break;
+ case 'n':
+ thread_count = atoi(optarg);
+ break;
+ case 'i':
+ thread = independent_thread;
+ printf("using independent threads\n");
+ break;
+ default:
+ printf("Usage: %s [-t <secs>] [-n <numthreads>] [-i]\n", argv[0]);
+ printf(" -t: time to run\n");
+ printf(" -n: number of threads\n");
+ printf(" -i: use independent threads\n");
+ return -1;
+ }
+ }
+
+ if (thread_count > MAX_THREADS)
+ thread_count = MAX_THREADS;
+
+
+ setbuf(stdout, NULL);
+
+ start = time(0);
+ strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&start));
+ printf("%s\n", buf);
+ printf("Testing consistency with %i threads for %ld seconds: ", thread_count, runtime);
+ fflush(stdout);
+
+ /* spawn */
+ for (i = 0; i < thread_count; i++)
+ pthread_create(&pth[i], 0, thread, 0);
+
+ while (time(&now) < start + runtime) {
+ sleep(1);
+ if (done) {
+ ret = 1;
+ strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&now));
+ printf("%s\n", buf);
+ goto out;
+ }
+ }
+ printf("[OK]\n");
+ done = 1;
+
+out:
+ /* wait */
+ for (i = 0; i < thread_count; i++)
+ pthread_join(pth[i], &tret);
+
+ /* die */
+ if (ret)
+ ksft_exit_fail();
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
new file mode 100644
index 000000000..5397de708
--- /dev/null
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -0,0 +1,330 @@
+/* valid adjtimex test
+ * by: John Stultz <john.stultz@linaro.org>
+ * (C) Copyright Linaro 2015
+ * Licensed under the GPLv2
+ *
+ * This test validates adjtimex interface with valid
+ * and invalid test data.
+ *
+ * Usage: valid-adjtimex
+ *
+ * To build:
+ * $ gcc valid-adjtimex.c -o valid-adjtimex -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000LL
+#define USEC_PER_SEC 1000000LL
+
+#define ADJ_SETOFFSET 0x0100
+
+#include <sys/syscall.h>
+static int clock_adjtime(clockid_t id, struct timex *tx)
+{
+ return syscall(__NR_clock_adjtime, id, tx);
+}
+
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+ struct timex tx;
+ int ret;
+
+ tx.modes = ADJ_STATUS;
+ tx.status = 0;
+ ret = adjtimex(&tx);
+ return ret;
+}
+
+#define NUM_FREQ_VALID 32
+#define NUM_FREQ_OUTOFRANGE 4
+#define NUM_FREQ_INVALID 2
+
+long valid_freq[NUM_FREQ_VALID] = {
+ -499<<16,
+ -450<<16,
+ -400<<16,
+ -350<<16,
+ -300<<16,
+ -250<<16,
+ -200<<16,
+ -150<<16,
+ -100<<16,
+ -75<<16,
+ -50<<16,
+ -25<<16,
+ -10<<16,
+ -5<<16,
+ -1<<16,
+ -1000,
+ 1<<16,
+ 5<<16,
+ 10<<16,
+ 25<<16,
+ 50<<16,
+ 75<<16,
+ 100<<16,
+ 150<<16,
+ 200<<16,
+ 250<<16,
+ 300<<16,
+ 350<<16,
+ 400<<16,
+ 450<<16,
+ 499<<16,
+};
+
+long outofrange_freq[NUM_FREQ_OUTOFRANGE] = {
+ -1000<<16,
+ -550<<16,
+ 550<<16,
+ 1000<<16,
+};
+
+#define LONG_MAX (~0UL>>1)
+#define LONG_MIN (-LONG_MAX - 1)
+
+long invalid_freq[NUM_FREQ_INVALID] = {
+ LONG_MAX,
+ LONG_MIN,
+};
+
+int validate_freq(void)
+{
+ struct timex tx;
+ int ret, pass = 0;
+ int i;
+
+ clear_time_state();
+
+ memset(&tx, 0, sizeof(struct timex));
+ /* Set the leap second insert flag */
+
+ printf("Testing ADJ_FREQ... ");
+ fflush(stdout);
+ for (i = 0; i < NUM_FREQ_VALID; i++) {
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = valid_freq[i];
+
+ ret = adjtimex(&tx);
+ if (ret < 0) {
+ printf("[FAIL]\n");
+ printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n",
+ valid_freq[i], valid_freq[i]>>16);
+ pass = -1;
+ goto out;
+ }
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+ if (tx.freq != valid_freq[i]) {
+ printf("Warning: freq value %ld not what we set it (%ld)!\n",
+ tx.freq, valid_freq[i]);
+ }
+ }
+ for (i = 0; i < NUM_FREQ_OUTOFRANGE; i++) {
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = outofrange_freq[i];
+
+ ret = adjtimex(&tx);
+ if (ret < 0) {
+ printf("[FAIL]\n");
+ printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n",
+ outofrange_freq[i], outofrange_freq[i]>>16);
+ pass = -1;
+ goto out;
+ }
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+ if (tx.freq == outofrange_freq[i]) {
+ printf("[FAIL]\n");
+ printf("ERROR: out of range value %ld actually set!\n",
+ tx.freq);
+ pass = -1;
+ goto out;
+ }
+ }
+
+
+ if (sizeof(long) == 8) { /* this case only applies to 64bit systems */
+ for (i = 0; i < NUM_FREQ_INVALID; i++) {
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = invalid_freq[i];
+ ret = adjtimex(&tx);
+ if (ret >= 0) {
+ printf("[FAIL]\n");
+ printf("Error: No failure on invalid ADJ_FREQUENCY %ld\n",
+ invalid_freq[i]);
+ pass = -1;
+ goto out;
+ }
+ }
+ }
+
+ printf("[OK]\n");
+out:
+ /* reset freq to zero */
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = 0;
+ ret = adjtimex(&tx);
+
+ return pass;
+}
+
+
+int set_offset(long long offset, int use_nano)
+{
+ struct timex tmx = {};
+ int ret;
+
+ tmx.modes = ADJ_SETOFFSET;
+ if (use_nano) {
+ tmx.modes |= ADJ_NANO;
+
+ tmx.time.tv_sec = offset / NSEC_PER_SEC;
+ tmx.time.tv_usec = offset % NSEC_PER_SEC;
+
+ if (offset < 0 && tmx.time.tv_usec) {
+ tmx.time.tv_sec -= 1;
+ tmx.time.tv_usec += NSEC_PER_SEC;
+ }
+ } else {
+ tmx.time.tv_sec = offset / USEC_PER_SEC;
+ tmx.time.tv_usec = offset % USEC_PER_SEC;
+
+ if (offset < 0 && tmx.time.tv_usec) {
+ tmx.time.tv_sec -= 1;
+ tmx.time.tv_usec += USEC_PER_SEC;
+ }
+ }
+
+ ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+ if (ret < 0) {
+ printf("(sec: %ld usec: %ld) ", tmx.time.tv_sec, tmx.time.tv_usec);
+ printf("[FAIL]\n");
+ return -1;
+ }
+ return 0;
+}
+
+int set_bad_offset(long sec, long usec, int use_nano)
+{
+ struct timex tmx = {};
+ int ret;
+
+ tmx.modes = ADJ_SETOFFSET;
+ if (use_nano)
+ tmx.modes |= ADJ_NANO;
+
+ tmx.time.tv_sec = sec;
+ tmx.time.tv_usec = usec;
+ ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+ if (ret >= 0) {
+ printf("Invalid (sec: %ld usec: %ld) did not fail! ", tmx.time.tv_sec, tmx.time.tv_usec);
+ printf("[FAIL]\n");
+ return -1;
+ }
+ return 0;
+}
+
+int validate_set_offset(void)
+{
+ printf("Testing ADJ_SETOFFSET... ");
+ fflush(stdout);
+
+ /* Test valid values */
+ if (set_offset(NSEC_PER_SEC - 1, 1))
+ return -1;
+
+ if (set_offset(-NSEC_PER_SEC + 1, 1))
+ return -1;
+
+ if (set_offset(-NSEC_PER_SEC - 1, 1))
+ return -1;
+
+ if (set_offset(5 * NSEC_PER_SEC, 1))
+ return -1;
+
+ if (set_offset(-5 * NSEC_PER_SEC, 1))
+ return -1;
+
+ if (set_offset(5 * NSEC_PER_SEC + NSEC_PER_SEC / 2, 1))
+ return -1;
+
+ if (set_offset(-5 * NSEC_PER_SEC - NSEC_PER_SEC / 2, 1))
+ return -1;
+
+ if (set_offset(USEC_PER_SEC - 1, 0))
+ return -1;
+
+ if (set_offset(-USEC_PER_SEC + 1, 0))
+ return -1;
+
+ if (set_offset(-USEC_PER_SEC - 1, 0))
+ return -1;
+
+ if (set_offset(5 * USEC_PER_SEC, 0))
+ return -1;
+
+ if (set_offset(-5 * USEC_PER_SEC, 0))
+ return -1;
+
+ if (set_offset(5 * USEC_PER_SEC + USEC_PER_SEC / 2, 0))
+ return -1;
+
+ if (set_offset(-5 * USEC_PER_SEC - USEC_PER_SEC / 2, 0))
+ return -1;
+
+ /* Test invalid values */
+ if (set_bad_offset(0, -1, 1))
+ return -1;
+ if (set_bad_offset(0, -1, 0))
+ return -1;
+ if (set_bad_offset(0, 2 * NSEC_PER_SEC, 1))
+ return -1;
+ if (set_bad_offset(0, 2 * USEC_PER_SEC, 0))
+ return -1;
+ if (set_bad_offset(0, NSEC_PER_SEC, 1))
+ return -1;
+ if (set_bad_offset(0, USEC_PER_SEC, 0))
+ return -1;
+ if (set_bad_offset(0, -NSEC_PER_SEC, 1))
+ return -1;
+ if (set_bad_offset(0, -USEC_PER_SEC, 0))
+ return -1;
+
+ printf("[OK]\n");
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ if (validate_freq())
+ return ksft_exit_fail();
+
+ if (validate_set_offset())
+ return ksft_exit_fail();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/uevent/Makefile b/tools/testing/selftests/uevent/Makefile
new file mode 100644
index 000000000..f7baa9aa2
--- /dev/null
+++ b/tools/testing/selftests/uevent/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+include ../lib.mk
+
+.PHONY: all clean
+
+BINARIES := uevent_filtering
+CFLAGS += -Wl,-no-as-needed -Wall
+
+uevent_filtering: uevent_filtering.c ../kselftest.h ../kselftest_harness.h
+ $(CC) $(CFLAGS) $< -o $@
+
+TEST_PROGS += $(BINARIES)
+EXTRA_CLEAN := $(BINARIES)
+
+all: $(BINARIES)
diff --git a/tools/testing/selftests/uevent/config b/tools/testing/selftests/uevent/config
new file mode 100644
index 000000000..1038f4515
--- /dev/null
+++ b/tools/testing/selftests/uevent/config
@@ -0,0 +1,2 @@
+CONFIG_USER_NS=y
+CONFIG_NET=y
diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c
new file mode 100644
index 000000000..f83391aa4
--- /dev/null
+++ b/tools/testing/selftests/uevent/uevent_filtering.c
@@ -0,0 +1,486 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/netlink.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sched.h>
+#include <sys/eventfd.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+#define __DEV_FULL "/sys/devices/virtual/mem/full/uevent"
+#define __UEVENT_BUFFER_SIZE (2048 * 2)
+#define __UEVENT_HEADER "add@/devices/virtual/mem/full"
+#define __UEVENT_HEADER_LEN sizeof("add@/devices/virtual/mem/full")
+#define __UEVENT_LISTEN_ALL -1
+
+ssize_t read_nointr(int fd, void *buf, size_t count)
+{
+ ssize_t ret;
+
+again:
+ ret = read(fd, buf, count);
+ if (ret < 0 && errno == EINTR)
+ goto again;
+
+ return ret;
+}
+
+ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+again:
+ ret = write(fd, buf, count);
+ if (ret < 0 && errno == EINTR)
+ goto again;
+
+ return ret;
+}
+
+int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (ret != pid)
+ goto again;
+
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
+ return -1;
+
+ return 0;
+}
+
+static int uevent_listener(unsigned long post_flags, bool expect_uevent,
+ int sync_fd)
+{
+ int sk_fd, ret;
+ socklen_t sk_addr_len;
+ int fret = -1, rcv_buf_sz = __UEVENT_BUFFER_SIZE;
+ uint64_t sync_add = 1;
+ struct sockaddr_nl sk_addr = { 0 }, rcv_addr = { 0 };
+ char buf[__UEVENT_BUFFER_SIZE] = { 0 };
+ struct iovec iov = { buf, __UEVENT_BUFFER_SIZE };
+ char control[CMSG_SPACE(sizeof(struct ucred))];
+ struct msghdr hdr = {
+ &rcv_addr, sizeof(rcv_addr), &iov, 1,
+ control, sizeof(control), 0,
+ };
+
+ sk_fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC,
+ NETLINK_KOBJECT_UEVENT);
+ if (sk_fd < 0) {
+ fprintf(stderr, "%s - Failed to open uevent socket\n", strerror(errno));
+ return -1;
+ }
+
+ ret = setsockopt(sk_fd, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz,
+ sizeof(rcv_buf_sz));
+ if (ret < 0) {
+ fprintf(stderr, "%s - Failed to set socket options\n", strerror(errno));
+ goto on_error;
+ }
+
+ sk_addr.nl_family = AF_NETLINK;
+ sk_addr.nl_groups = __UEVENT_LISTEN_ALL;
+
+ sk_addr_len = sizeof(sk_addr);
+ ret = bind(sk_fd, (struct sockaddr *)&sk_addr, sk_addr_len);
+ if (ret < 0) {
+ fprintf(stderr, "%s - Failed to bind socket\n", strerror(errno));
+ goto on_error;
+ }
+
+ ret = getsockname(sk_fd, (struct sockaddr *)&sk_addr, &sk_addr_len);
+ if (ret < 0) {
+ fprintf(stderr, "%s - Failed to retrieve socket name\n", strerror(errno));
+ goto on_error;
+ }
+
+ if ((size_t)sk_addr_len != sizeof(sk_addr)) {
+ fprintf(stderr, "Invalid socket address size\n");
+ goto on_error;
+ }
+
+ if (post_flags & CLONE_NEWUSER) {
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ fprintf(stderr,
+ "%s - Failed to unshare user namespace\n",
+ strerror(errno));
+ goto on_error;
+ }
+ }
+
+ if (post_flags & CLONE_NEWNET) {
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ fprintf(stderr,
+ "%s - Failed to unshare network namespace\n",
+ strerror(errno));
+ goto on_error;
+ }
+ }
+
+ ret = write_nointr(sync_fd, &sync_add, sizeof(sync_add));
+ close(sync_fd);
+ if (ret != sizeof(sync_add)) {
+ fprintf(stderr, "Failed to synchronize with parent process\n");
+ goto on_error;
+ }
+
+ fret = 0;
+ for (;;) {
+ ssize_t r;
+
+ r = recvmsg(sk_fd, &hdr, 0);
+ if (r <= 0) {
+ fprintf(stderr, "%s - Failed to receive uevent\n", strerror(errno));
+ ret = -1;
+ break;
+ }
+
+ /* ignore libudev messages */
+ if (memcmp(buf, "libudev", 8) == 0)
+ continue;
+
+ /* ignore uevents we didn't trigger */
+ if (memcmp(buf, __UEVENT_HEADER, __UEVENT_HEADER_LEN) != 0)
+ continue;
+
+ if (!expect_uevent) {
+ fprintf(stderr, "Received unexpected uevent:\n");
+ ret = -1;
+ }
+
+ if (TH_LOG_ENABLED) {
+ /* If logging is enabled dump the received uevent. */
+ (void)write_nointr(STDERR_FILENO, buf, r);
+ (void)write_nointr(STDERR_FILENO, "\n", 1);
+ }
+
+ break;
+ }
+
+on_error:
+ close(sk_fd);
+
+ return fret;
+}
+
+int trigger_uevent(unsigned int times)
+{
+ int fd, ret;
+ unsigned int i;
+
+ fd = open(__DEV_FULL, O_RDWR | O_CLOEXEC);
+ if (fd < 0) {
+ if (errno != ENOENT)
+ return -EINVAL;
+
+ return -1;
+ }
+
+ for (i = 0; i < times; i++) {
+ ret = write_nointr(fd, "add\n", sizeof("add\n") - 1);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to trigger uevent\n");
+ break;
+ }
+ }
+ close(fd);
+
+ return ret;
+}
+
+int set_death_signal(void)
+{
+ int ret;
+ pid_t ppid;
+
+ ret = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
+
+ /* Check whether we have been orphaned. */
+ ppid = getppid();
+ if (ppid == 1) {
+ pid_t self;
+
+ self = getpid();
+ ret = kill(self, SIGKILL);
+ }
+
+ if (ret < 0)
+ return -1;
+
+ return 0;
+}
+
+static int do_test(unsigned long pre_flags, unsigned long post_flags,
+ bool expect_uevent, int sync_fd)
+{
+ int ret;
+ uint64_t wait_val;
+ pid_t pid;
+ sigset_t mask;
+ sigset_t orig_mask;
+ struct timespec timeout;
+
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGCHLD);
+
+ ret = sigprocmask(SIG_BLOCK, &mask, &orig_mask);
+ if (ret < 0) {
+ fprintf(stderr, "%s- Failed to block SIGCHLD\n", strerror(errno));
+ return -1;
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ fprintf(stderr, "%s - Failed to fork() new process\n", strerror(errno));
+ return -1;
+ }
+
+ if (pid == 0) {
+ /* Make sure that we go away when our parent dies. */
+ ret = set_death_signal();
+ if (ret < 0) {
+ fprintf(stderr, "Failed to set PR_SET_PDEATHSIG to SIGKILL\n");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (pre_flags & CLONE_NEWUSER) {
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ fprintf(stderr,
+ "%s - Failed to unshare user namespace\n",
+ strerror(errno));
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (pre_flags & CLONE_NEWNET) {
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ fprintf(stderr,
+ "%s - Failed to unshare network namespace\n",
+ strerror(errno));
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (uevent_listener(post_flags, expect_uevent, sync_fd) < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ ret = read_nointr(sync_fd, &wait_val, sizeof(wait_val));
+ if (ret != sizeof(wait_val)) {
+ fprintf(stderr, "Failed to synchronize with child process\n");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* Trigger 10 uevents to account for the case where the kernel might
+ * drop some.
+ */
+ ret = trigger_uevent(10);
+ if (ret < 0)
+ fprintf(stderr, "Failed triggering uevents\n");
+
+ /* Wait for 2 seconds before considering this failed. This should be
+ * plenty of time for the kernel to deliver the uevent even under heavy
+ * load.
+ */
+ timeout.tv_sec = 2;
+ timeout.tv_nsec = 0;
+
+again:
+ ret = sigtimedwait(&mask, NULL, &timeout);
+ if (ret < 0) {
+ if (errno == EINTR)
+ goto again;
+
+ if (!expect_uevent)
+ ret = kill(pid, SIGTERM); /* success */
+ else
+ ret = kill(pid, SIGUSR1); /* error */
+ if (ret < 0)
+ return -1;
+ }
+
+ ret = wait_for_pid(pid);
+ if (ret < 0)
+ return -1;
+
+ return ret;
+}
+
+static void signal_handler(int sig)
+{
+ if (sig == SIGTERM)
+ _exit(EXIT_SUCCESS);
+
+ _exit(EXIT_FAILURE);
+}
+
+TEST(uevent_filtering)
+{
+ int ret, sync_fd;
+ struct sigaction act;
+
+ if (geteuid()) {
+ TH_LOG("Uevent filtering tests require root privileges. Skipping test");
+ _exit(KSFT_SKIP);
+ }
+
+ ret = access(__DEV_FULL, F_OK);
+ EXPECT_EQ(0, ret) {
+ if (errno == ENOENT) {
+ TH_LOG(__DEV_FULL " does not exist. Skipping test");
+ _exit(KSFT_SKIP);
+ }
+
+ _exit(KSFT_FAIL);
+ }
+
+ act.sa_handler = signal_handler;
+ act.sa_flags = 0;
+ sigemptyset(&act.sa_mask);
+
+ ret = sigaction(SIGTERM, &act, NULL);
+ ASSERT_EQ(0, ret);
+
+ sync_fd = eventfd(0, EFD_CLOEXEC);
+ ASSERT_GE(sync_fd, 0);
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in initial network namespace owned by
+ * initial user namespace.
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(0, 0, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in non-initial network namespace
+ * owned by initial user namespace.
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(CLONE_NEWNET, 0, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - unshare user namespace
+ * - Open uevent listening socket in initial network namespace
+ * owned by initial user namespace.
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(CLONE_NEWUSER, 0, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in non-initial network namespace
+ * owned by non-initial user namespace.
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives no uevent
+ */
+ ret = do_test(CLONE_NEWUSER | CLONE_NEWNET, 0, false, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in initial network namespace
+ * owned by initial user namespace.
+ * - unshare network namespace
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(0, CLONE_NEWNET, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in initial network namespace
+ * owned by initial user namespace.
+ * - unshare user namespace
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(0, CLONE_NEWUSER, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in initial network namespace
+ * owned by initial user namespace.
+ * - unshare user namespace
+ * - unshare network namespace
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(0, CLONE_NEWUSER | CLONE_NEWNET, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+do_cleanup:
+ close(sync_fd);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/user/Makefile b/tools/testing/selftests/user/Makefile
new file mode 100644
index 000000000..d401b63c5
--- /dev/null
+++ b/tools/testing/selftests/user/Makefile
@@ -0,0 +1,8 @@
+# Makefile for user memory selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := test_user_copy.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/user/config b/tools/testing/selftests/user/config
new file mode 100644
index 000000000..784ed8416
--- /dev/null
+++ b/tools/testing/selftests/user/config
@@ -0,0 +1 @@
+CONFIG_TEST_USER_COPY=m
diff --git a/tools/testing/selftests/user/test_user_copy.sh b/tools/testing/selftests/user/test_user_copy.sh
new file mode 100755
index 000000000..f9b31a574
--- /dev/null
+++ b/tools/testing/selftests/user/test_user_copy.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs copy_to/from_user infrastructure using test_user_copy kernel module
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n test_user_copy; then
+ echo "user: module test_user_copy is not found [SKIP]"
+ exit $ksft_skip
+fi
+if /sbin/modprobe -q test_user_copy; then
+ /sbin/modprobe -q -r test_user_copy
+ echo "user_copy: ok"
+else
+ echo "user_copy: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore
new file mode 100644
index 000000000..133bf9ee9
--- /dev/null
+++ b/tools/testing/selftests/vDSO/.gitignore
@@ -0,0 +1,2 @@
+vdso_test
+vdso_standalone_test_x86
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
new file mode 100644
index 000000000..9e03d61f5
--- /dev/null
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../lib.mk
+
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+TEST_GEN_PROGS := $(OUTPUT)/vdso_test
+ifeq ($(ARCH),x86)
+TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
+endif
+
+ifndef CROSS_COMPILE
+CFLAGS := -std=gnu99
+CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
+ifeq ($(CONFIG_X86_32),y)
+LDLIBS += -lgcc_s
+endif
+
+all: $(TEST_GEN_PROGS)
+$(OUTPUT)/vdso_test: parse_vdso.c vdso_test.c
+$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
+ $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
+ vdso_standalone_test_x86.c parse_vdso.c \
+ -o $@
+
+endif
diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c
new file mode 100644
index 000000000..1dbb4b872
--- /dev/null
+++ b/tools/testing/selftests/vDSO/parse_vdso.c
@@ -0,0 +1,269 @@
+/*
+ * parse_vdso.c: Linux reference vDSO parser
+ * Written by Andrew Lutomirski, 2011-2014.
+ *
+ * This code is meant to be linked in to various programs that run on Linux.
+ * As such, it is available with as few restrictions as possible. This file
+ * is licensed under the Creative Commons Zero License, version 1.0,
+ * available at http://creativecommons.org/publicdomain/zero/1.0/legalcode
+ *
+ * The vDSO is a regular ELF DSO that the kernel maps into user space when
+ * it starts a program. It works equally well in statically and dynamically
+ * linked binaries.
+ *
+ * This code is tested on x86. In principle it should work on any
+ * architecture that has a vDSO.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+#include <elf.h>
+
+/*
+ * To use this vDSO parser, first call one of the vdso_init_* functions.
+ * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR
+ * to vdso_init_from_sysinfo_ehdr. Otherwise pass auxv to vdso_init_from_auxv.
+ * Then call vdso_sym for each symbol you want. For example, to look up
+ * gettimeofday on x86_64, use:
+ *
+ * <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday");
+ * or
+ * <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+ *
+ * vdso_sym will return 0 if the symbol doesn't exist or if the init function
+ * failed or was not called. vdso_sym is a little slow, so its return value
+ * should be cached.
+ *
+ * vdso_sym is threadsafe; the init functions are not.
+ *
+ * These are the prototypes:
+ */
+extern void vdso_init_from_auxv(void *auxv);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void *vdso_sym(const char *version, const char *name);
+
+
+/* And here's the code. */
+#ifndef ELF_BITS
+# if ULONG_MAX > 0xffffffffUL
+# define ELF_BITS 64
+# else
+# define ELF_BITS 32
+# endif
+#endif
+
+#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
+#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
+#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
+
+static struct vdso_info
+{
+ bool valid;
+
+ /* Load information */
+ uintptr_t load_addr;
+ uintptr_t load_offset; /* load_addr - recorded vaddr */
+
+ /* Symbol table */
+ ELF(Sym) *symtab;
+ const char *symstrings;
+ ELF(Word) *bucket, *chain;
+ ELF(Word) nbucket, nchain;
+
+ /* Version table */
+ ELF(Versym) *versym;
+ ELF(Verdef) *verdef;
+} vdso_info;
+
+/* Straight from the ELF specification. */
+static unsigned long elf_hash(const unsigned char *name)
+{
+ unsigned long h = 0, g;
+ while (*name)
+ {
+ h = (h << 4) + *name++;
+ if (g = h & 0xf0000000)
+ h ^= g >> 24;
+ h &= ~g;
+ }
+ return h;
+}
+
+void vdso_init_from_sysinfo_ehdr(uintptr_t base)
+{
+ size_t i;
+ bool found_vaddr = false;
+
+ vdso_info.valid = false;
+
+ vdso_info.load_addr = base;
+
+ ELF(Ehdr) *hdr = (ELF(Ehdr)*)base;
+ if (hdr->e_ident[EI_CLASS] !=
+ (ELF_BITS == 32 ? ELFCLASS32 : ELFCLASS64)) {
+ return; /* Wrong ELF class -- check ELF_BITS */
+ }
+
+ ELF(Phdr) *pt = (ELF(Phdr)*)(vdso_info.load_addr + hdr->e_phoff);
+ ELF(Dyn) *dyn = 0;
+
+ /*
+ * We need two things from the segment table: the load offset
+ * and the dynamic table.
+ */
+ for (i = 0; i < hdr->e_phnum; i++)
+ {
+ if (pt[i].p_type == PT_LOAD && !found_vaddr) {
+ found_vaddr = true;
+ vdso_info.load_offset = base
+ + (uintptr_t)pt[i].p_offset
+ - (uintptr_t)pt[i].p_vaddr;
+ } else if (pt[i].p_type == PT_DYNAMIC) {
+ dyn = (ELF(Dyn)*)(base + pt[i].p_offset);
+ }
+ }
+
+ if (!found_vaddr || !dyn)
+ return; /* Failed */
+
+ /*
+ * Fish out the useful bits of the dynamic table.
+ */
+ ELF(Word) *hash = 0;
+ vdso_info.symstrings = 0;
+ vdso_info.symtab = 0;
+ vdso_info.versym = 0;
+ vdso_info.verdef = 0;
+ for (i = 0; dyn[i].d_tag != DT_NULL; i++) {
+ switch (dyn[i].d_tag) {
+ case DT_STRTAB:
+ vdso_info.symstrings = (const char *)
+ ((uintptr_t)dyn[i].d_un.d_ptr
+ + vdso_info.load_offset);
+ break;
+ case DT_SYMTAB:
+ vdso_info.symtab = (ELF(Sym) *)
+ ((uintptr_t)dyn[i].d_un.d_ptr
+ + vdso_info.load_offset);
+ break;
+ case DT_HASH:
+ hash = (ELF(Word) *)
+ ((uintptr_t)dyn[i].d_un.d_ptr
+ + vdso_info.load_offset);
+ break;
+ case DT_VERSYM:
+ vdso_info.versym = (ELF(Versym) *)
+ ((uintptr_t)dyn[i].d_un.d_ptr
+ + vdso_info.load_offset);
+ break;
+ case DT_VERDEF:
+ vdso_info.verdef = (ELF(Verdef) *)
+ ((uintptr_t)dyn[i].d_un.d_ptr
+ + vdso_info.load_offset);
+ break;
+ }
+ }
+ if (!vdso_info.symstrings || !vdso_info.symtab || !hash)
+ return; /* Failed */
+
+ if (!vdso_info.verdef)
+ vdso_info.versym = 0;
+
+ /* Parse the hash table header. */
+ vdso_info.nbucket = hash[0];
+ vdso_info.nchain = hash[1];
+ vdso_info.bucket = &hash[2];
+ vdso_info.chain = &hash[vdso_info.nbucket + 2];
+
+ /* That's all we need. */
+ vdso_info.valid = true;
+}
+
+static bool vdso_match_version(ELF(Versym) ver,
+ const char *name, ELF(Word) hash)
+{
+ /*
+ * This is a helper function to check if the version indexed by
+ * ver matches name (which hashes to hash).
+ *
+ * The version definition table is a mess, and I don't know how
+ * to do this in better than linear time without allocating memory
+ * to build an index. I also don't know why the table has
+ * variable size entries in the first place.
+ *
+ * For added fun, I can't find a comprehensible specification of how
+ * to parse all the weird flags in the table.
+ *
+ * So I just parse the whole table every time.
+ */
+
+ /* First step: find the version definition */
+ ver &= 0x7fff; /* Apparently bit 15 means "hidden" */
+ ELF(Verdef) *def = vdso_info.verdef;
+ while(true) {
+ if ((def->vd_flags & VER_FLG_BASE) == 0
+ && (def->vd_ndx & 0x7fff) == ver)
+ break;
+
+ if (def->vd_next == 0)
+ return false; /* No definition. */
+
+ def = (ELF(Verdef) *)((char *)def + def->vd_next);
+ }
+
+ /* Now figure out whether it matches. */
+ ELF(Verdaux) *aux = (ELF(Verdaux)*)((char *)def + def->vd_aux);
+ return def->vd_hash == hash
+ && !strcmp(name, vdso_info.symstrings + aux->vda_name);
+}
+
+void *vdso_sym(const char *version, const char *name)
+{
+ unsigned long ver_hash;
+ if (!vdso_info.valid)
+ return 0;
+
+ ver_hash = elf_hash(version);
+ ELF(Word) chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
+
+ for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) {
+ ELF(Sym) *sym = &vdso_info.symtab[chain];
+
+ /* Check for a defined global or weak function w/ right name. */
+ if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
+ continue;
+ if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
+ ELF64_ST_BIND(sym->st_info) != STB_WEAK)
+ continue;
+ if (sym->st_shndx == SHN_UNDEF)
+ continue;
+ if (strcmp(name, vdso_info.symstrings + sym->st_name))
+ continue;
+
+ /* Check symbol version. */
+ if (vdso_info.versym
+ && !vdso_match_version(vdso_info.versym[chain],
+ version, ver_hash))
+ continue;
+
+ return (void *)(vdso_info.load_offset + sym->st_value);
+ }
+
+ return 0;
+}
+
+void vdso_init_from_auxv(void *auxv)
+{
+ ELF(auxv_t) *elf_auxv = auxv;
+ for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++)
+ {
+ if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) {
+ vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val);
+ return;
+ }
+ }
+
+ vdso_info.valid = false;
+}
diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
new file mode 100644
index 000000000..93b0ebf8c
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
@@ -0,0 +1,128 @@
+/*
+ * vdso_test.c: Sample code to test parse_vdso.c on x86
+ * Copyright (c) 2011-2014 Andy Lutomirski
+ * Subject to the GNU General Public License, version 2
+ *
+ * You can amuse yourself by compiling with:
+ * gcc -std=gnu99 -nostdlib
+ * -Os -fno-asynchronous-unwind-tables -flto -lgcc_s
+ * vdso_standalone_test_x86.c parse_vdso.c
+ * to generate a small binary. On x86_64, you can omit -lgcc_s
+ * if you want the binary to be completely standalone.
+ */
+
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <stdint.h>
+
+extern void *vdso_sym(const char *version, const char *name);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void vdso_init_from_auxv(void *auxv);
+
+/* We need a libc functions... */
+int strcmp(const char *a, const char *b)
+{
+ /* This implementation is buggy: it never returns -1. */
+ while (*a || *b) {
+ if (*a != *b)
+ return 1;
+ if (*a == 0 || *b == 0)
+ return 1;
+ a++;
+ b++;
+ }
+
+ return 0;
+}
+
+/* ...and two syscalls. This is x86-specific. */
+static inline long x86_syscall3(long nr, long a0, long a1, long a2)
+{
+ long ret;
+#ifdef __x86_64__
+ asm volatile ("syscall" : "=a" (ret) : "a" (nr),
+ "D" (a0), "S" (a1), "d" (a2) :
+ "cc", "memory", "rcx",
+ "r8", "r9", "r10", "r11" );
+#else
+ asm volatile ("int $0x80" : "=a" (ret) : "a" (nr),
+ "b" (a0), "c" (a1), "d" (a2) :
+ "cc", "memory" );
+#endif
+ return ret;
+}
+
+static inline long linux_write(int fd, const void *data, size_t len)
+{
+ return x86_syscall3(__NR_write, fd, (long)data, (long)len);
+}
+
+static inline void linux_exit(int code)
+{
+ x86_syscall3(__NR_exit, code, 0, 0);
+}
+
+void to_base10(char *lastdig, time_t n)
+{
+ while (n) {
+ *lastdig = (n % 10) + '0';
+ n /= 10;
+ lastdig--;
+ }
+}
+
+__attribute__((externally_visible)) void c_main(void **stack)
+{
+ /* Parse the stack */
+ long argc = (long)*stack;
+ stack += argc + 2;
+
+ /* Now we're pointing at the environment. Skip it. */
+ while(*stack)
+ stack++;
+ stack++;
+
+ /* Now we're pointing at auxv. Initialize the vDSO parser. */
+ vdso_init_from_auxv((void *)stack);
+
+ /* Find gettimeofday. */
+ typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+ gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+
+ if (!gtod)
+ linux_exit(1);
+
+ struct timeval tv;
+ long ret = gtod(&tv, 0);
+
+ if (ret == 0) {
+ char buf[] = "The time is .000000\n";
+ to_base10(buf + 31, tv.tv_sec);
+ to_base10(buf + 38, tv.tv_usec);
+ linux_write(1, buf, sizeof(buf) - 1);
+ } else {
+ linux_exit(ret);
+ }
+
+ linux_exit(0);
+}
+
+/*
+ * This is the real entry point. It passes the initial stack into
+ * the C entry point.
+ */
+asm (
+ ".text\n"
+ ".global _start\n"
+ ".type _start,@function\n"
+ "_start:\n\t"
+#ifdef __x86_64__
+ "mov %rsp,%rdi\n\t"
+ "jmp c_main"
+#else
+ "push %esp\n\t"
+ "call c_main\n\t"
+ "int $3"
+#endif
+ );
diff --git a/tools/testing/selftests/vDSO/vdso_test.c b/tools/testing/selftests/vDSO/vdso_test.c
new file mode 100644
index 000000000..eda53f833
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_test.c
@@ -0,0 +1,68 @@
+/*
+ * vdso_test.c: Sample code to test parse_vdso.c
+ * Copyright (c) 2014 Andy Lutomirski
+ * Subject to the GNU General Public License, version 2
+ *
+ * Compile with:
+ * gcc -std=gnu99 vdso_test.c parse_vdso.c
+ *
+ * Tested on x86, 32-bit and 64-bit. It may work on other architectures, too.
+ */
+
+#include <stdint.h>
+#include <elf.h>
+#include <stdio.h>
+#include <sys/auxv.h>
+#include <sys/time.h>
+
+#include "../kselftest.h"
+
+extern void *vdso_sym(const char *version, const char *name);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void vdso_init_from_auxv(void *auxv);
+
+/*
+ * ARM64's vDSO exports its gettimeofday() implementation with a different
+ * name and version from other architectures, so we need to handle it as
+ * a special case.
+ */
+#if defined(__aarch64__)
+const char *version = "LINUX_2.6.39";
+const char *name = "__kernel_gettimeofday";
+#else
+const char *version = "LINUX_2.6";
+const char *name = "__vdso_gettimeofday";
+#endif
+
+int main(int argc, char **argv)
+{
+ unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+ if (!sysinfo_ehdr) {
+ printf("AT_SYSINFO_EHDR is not present!\n");
+ return KSFT_SKIP;
+ }
+
+ vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
+
+ /* Find gettimeofday. */
+ typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+ gtod_t gtod = (gtod_t)vdso_sym(version, name);
+
+ if (!gtod) {
+ printf("Could not find %s\n", name);
+ return KSFT_SKIP;
+ }
+
+ struct timeval tv;
+ long ret = gtod(&tv, 0);
+
+ if (ret == 0) {
+ printf("The time is %lld.%06lld\n",
+ (long long)tv.tv_sec, (long long)tv.tv_usec);
+ } else {
+ printf("%s failed\n", name);
+ return KSFT_FAIL;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
new file mode 100644
index 000000000..af5ff83f6
--- /dev/null
+++ b/tools/testing/selftests/vm/.gitignore
@@ -0,0 +1,15 @@
+hugepage-mmap
+hugepage-shm
+map_hugetlb
+map_populate
+thuge-gen
+compaction_test
+mlock2-tests
+on-fault-limit
+transhuge-stress
+userfaultfd
+mlock-intersect-test
+mlock-random-test
+virtual_address_range
+gup_benchmark
+va_128TBswitch
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
new file mode 100644
index 000000000..2cf3dc49b
--- /dev/null
+++ b/tools/testing/selftests/vm/Makefile
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for vm selftests
+
+ifndef OUTPUT
+ OUTPUT := $(shell pwd)
+endif
+
+CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
+LDLIBS = -lrt
+TEST_GEN_FILES = compaction_test
+TEST_GEN_FILES += gup_benchmark
+TEST_GEN_FILES += hugepage-mmap
+TEST_GEN_FILES += hugepage-shm
+TEST_GEN_FILES += map_hugetlb
+TEST_GEN_FILES += map_populate
+TEST_GEN_FILES += mlock-random-test
+TEST_GEN_FILES += mlock2-tests
+TEST_GEN_FILES += on-fault-limit
+TEST_GEN_FILES += thuge-gen
+TEST_GEN_FILES += transhuge-stress
+TEST_GEN_FILES += userfaultfd
+TEST_GEN_FILES += va_128TBswitch
+TEST_GEN_FILES += virtual_address_range
+
+TEST_PROGS := run_vmtests
+
+TEST_FILES := test_vmalloc.sh
+
+KSFT_KHDR_INSTALL := 1
+include ../lib.mk
+
+$(OUTPUT)/userfaultfd: LDLIBS += -lpthread
+
+$(OUTPUT)/mlock-random-test: LDLIBS += -lcap
diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c
new file mode 100644
index 000000000..bcec71250
--- /dev/null
+++ b/tools/testing/selftests/vm/compaction_test.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * A test for the patch "Allow compaction of unevictable pages".
+ * With this patch we should be able to allocate at least 1/4
+ * of RAM in huge pages. Without the patch much less is
+ * allocated.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "../kselftest.h"
+
+#define MAP_SIZE 1048576
+
+struct map_list {
+ void *map;
+ struct map_list *next;
+};
+
+int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize)
+{
+ char buffer[256] = {0};
+ char *cmd = "cat /proc/meminfo | grep -i memfree | grep -o '[0-9]*'";
+ FILE *cmdfile = popen(cmd, "r");
+
+ if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
+ perror("Failed to read meminfo\n");
+ return -1;
+ }
+
+ pclose(cmdfile);
+
+ *memfree = atoll(buffer);
+ cmd = "cat /proc/meminfo | grep -i hugepagesize | grep -o '[0-9]*'";
+ cmdfile = popen(cmd, "r");
+
+ if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
+ perror("Failed to read meminfo\n");
+ return -1;
+ }
+
+ pclose(cmdfile);
+ *hugepagesize = atoll(buffer);
+
+ return 0;
+}
+
+int prereq(void)
+{
+ char allowed;
+ int fd;
+
+ fd = open("/proc/sys/vm/compact_unevictable_allowed",
+ O_RDONLY | O_NONBLOCK);
+ if (fd < 0) {
+ perror("Failed to open\n"
+ "/proc/sys/vm/compact_unevictable_allowed\n");
+ return -1;
+ }
+
+ if (read(fd, &allowed, sizeof(char)) != sizeof(char)) {
+ perror("Failed to read from\n"
+ "/proc/sys/vm/compact_unevictable_allowed\n");
+ close(fd);
+ return -1;
+ }
+
+ close(fd);
+ if (allowed == '1')
+ return 0;
+
+ return -1;
+}
+
+int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
+{
+ int fd;
+ int compaction_index = 0;
+ char initial_nr_hugepages[10] = {0};
+ char nr_hugepages[10] = {0};
+
+ /* We want to test with 80% of available memory. Else, OOM killer comes
+ in to play */
+ mem_free = mem_free * 0.8;
+
+ fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK);
+ if (fd < 0) {
+ perror("Failed to open /proc/sys/vm/nr_hugepages");
+ return -1;
+ }
+
+ if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) {
+ perror("Failed to read from /proc/sys/vm/nr_hugepages");
+ goto close_fd;
+ }
+
+ /* Start with the initial condition of 0 huge pages*/
+ if (write(fd, "0", sizeof(char)) != sizeof(char)) {
+ perror("Failed to write 0 to /proc/sys/vm/nr_hugepages\n");
+ goto close_fd;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+
+ /* Request a large number of huge pages. The Kernel will allocate
+ as much as it can */
+ if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
+ perror("Failed to write 100000 to /proc/sys/vm/nr_hugepages\n");
+ goto close_fd;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+
+ if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) {
+ perror("Failed to re-read from /proc/sys/vm/nr_hugepages\n");
+ goto close_fd;
+ }
+
+ /* We should have been able to request at least 1/3 rd of the memory in
+ huge pages */
+ compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size);
+
+ if (compaction_index > 3) {
+ printf("No of huge pages allocated = %d\n",
+ (atoi(nr_hugepages)));
+ fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n"
+ "as huge pages\n", compaction_index);
+ goto close_fd;
+ }
+
+ printf("No of huge pages allocated = %d\n",
+ (atoi(nr_hugepages)));
+
+ lseek(fd, 0, SEEK_SET);
+
+ if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages))
+ != strlen(initial_nr_hugepages)) {
+ perror("Failed to write value to /proc/sys/vm/nr_hugepages\n");
+ goto close_fd;
+ }
+
+ close(fd);
+ return 0;
+
+ close_fd:
+ close(fd);
+ printf("Not OK. Compaction test failed.");
+ return -1;
+}
+
+
+int main(int argc, char **argv)
+{
+ struct rlimit lim;
+ struct map_list *list, *entry;
+ size_t page_size, i;
+ void *map = NULL;
+ unsigned long mem_free = 0;
+ unsigned long hugepage_size = 0;
+ unsigned long mem_fragmentable = 0;
+
+ if (prereq() != 0) {
+ printf("Either the sysctl compact_unevictable_allowed is not\n"
+ "set to 1 or couldn't read the proc file.\n"
+ "Skipping the test\n");
+ return KSFT_SKIP;
+ }
+
+ lim.rlim_cur = RLIM_INFINITY;
+ lim.rlim_max = RLIM_INFINITY;
+ if (setrlimit(RLIMIT_MEMLOCK, &lim)) {
+ perror("Failed to set rlimit:\n");
+ return -1;
+ }
+
+ page_size = getpagesize();
+
+ list = NULL;
+
+ if (read_memory_info(&mem_free, &hugepage_size) != 0) {
+ printf("ERROR: Cannot read meminfo\n");
+ return -1;
+ }
+
+ mem_fragmentable = mem_free * 0.8 / 1024;
+
+ while (mem_fragmentable > 0) {
+ map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0);
+ if (map == MAP_FAILED)
+ break;
+
+ entry = malloc(sizeof(struct map_list));
+ if (!entry) {
+ munmap(map, MAP_SIZE);
+ break;
+ }
+ entry->map = map;
+ entry->next = list;
+ list = entry;
+
+ /* Write something (in this case the address of the map) to
+ * ensure that KSM can't merge the mapped pages
+ */
+ for (i = 0; i < MAP_SIZE; i += page_size)
+ *(unsigned long *)(map + i) = (unsigned long)map + i;
+
+ mem_fragmentable--;
+ }
+
+ for (entry = list; entry != NULL; entry = entry->next) {
+ munmap(entry->map, MAP_SIZE);
+ if (!entry->next)
+ break;
+ entry = entry->next;
+ }
+
+ if (check_compaction(mem_free, hugepage_size) == 0)
+ return 0;
+
+ return -1;
+}
diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config
new file mode 100644
index 000000000..1c0d76cb5
--- /dev/null
+++ b/tools/testing/selftests/vm/config
@@ -0,0 +1,2 @@
+CONFIG_SYSVIPC=y
+CONFIG_USERFAULTFD=y
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
new file mode 100644
index 000000000..17da711f2
--- /dev/null
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -0,0 +1,93 @@
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <linux/types.h>
+
+#define MB (1UL << 20)
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+
+#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark)
+
+struct gup_benchmark {
+ __u64 delta_usec;
+ __u64 addr;
+ __u64 size;
+ __u32 nr_pages_per_call;
+ __u32 flags;
+ __u64 expansion[10]; /* For future use */
+};
+
+int main(int argc, char **argv)
+{
+ struct gup_benchmark gup;
+ unsigned long size = 128 * MB;
+ int i, fd, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
+ char *p;
+
+ while ((opt = getopt(argc, argv, "m:r:n:tT")) != -1) {
+ switch (opt) {
+ case 'm':
+ size = atoi(optarg) * MB;
+ break;
+ case 'r':
+ repeats = atoi(optarg);
+ break;
+ case 'n':
+ nr_pages = atoi(optarg);
+ break;
+ case 't':
+ thp = 1;
+ break;
+ case 'T':
+ thp = 0;
+ break;
+ case 'w':
+ write = 1;
+ break;
+ default:
+ return -1;
+ }
+ }
+
+ gup.nr_pages_per_call = nr_pages;
+ gup.flags = write;
+
+ fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR);
+ if (fd == -1)
+ perror("open"), exit(1);
+
+ p = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (p == MAP_FAILED)
+ perror("mmap"), exit(1);
+ gup.addr = (unsigned long)p;
+
+ if (thp == 1)
+ madvise(p, size, MADV_HUGEPAGE);
+ else if (thp == 0)
+ madvise(p, size, MADV_NOHUGEPAGE);
+
+ for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE)
+ p[0] = 0;
+
+ for (i = 0; i < repeats; i++) {
+ gup.size = size;
+ if (ioctl(fd, GUP_FAST_BENCHMARK, &gup))
+ perror("ioctl"), exit(1);
+
+ printf("Time: %lld us", gup.delta_usec);
+ if (gup.size != size)
+ printf(", truncated (size: %lld)", gup.size);
+ printf("\n");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/hugepage-mmap.c b/tools/testing/selftests/vm/hugepage-mmap.c
new file mode 100644
index 000000000..93f9e7b81
--- /dev/null
+++ b/tools/testing/selftests/vm/hugepage-mmap.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * hugepage-mmap:
+ *
+ * Example of using huge page memory in a user application using the mmap
+ * system call. Before running this application, make sure that the
+ * administrator has mounted the hugetlbfs filesystem (on some directory
+ * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this
+ * example, the app is requesting memory of size 256MB that is backed by
+ * huge pages.
+ *
+ * For the ia64 architecture, the Linux kernel reserves Region number 4 for
+ * huge pages. That means that if one requires a fixed address, a huge page
+ * aligned address starting with 0x800000... will be required. If a fixed
+ * address is not required, the kernel will select an address in the proper
+ * range.
+ * Other architectures, such as ppc64, i386 or x86_64 are not so constrained.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#define FILE_NAME "huge/hugepagefile"
+#define LENGTH (256UL*1024*1024)
+#define PROTECTION (PROT_READ | PROT_WRITE)
+
+/* Only ia64 requires this */
+#ifdef __ia64__
+#define ADDR (void *)(0x8000000000000000UL)
+#define FLAGS (MAP_SHARED | MAP_FIXED)
+#else
+#define ADDR (void *)(0x0UL)
+#define FLAGS (MAP_SHARED)
+#endif
+
+static void check_bytes(char *addr)
+{
+ printf("First hex is %x\n", *((unsigned int *)addr));
+}
+
+static void write_bytes(char *addr)
+{
+ unsigned long i;
+
+ for (i = 0; i < LENGTH; i++)
+ *(addr + i) = (char)i;
+}
+
+static int read_bytes(char *addr)
+{
+ unsigned long i;
+
+ check_bytes(addr);
+ for (i = 0; i < LENGTH; i++)
+ if (*(addr + i) != (char)i) {
+ printf("Mismatch at %lu\n", i);
+ return 1;
+ }
+ return 0;
+}
+
+int main(void)
+{
+ void *addr;
+ int fd, ret;
+
+ fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755);
+ if (fd < 0) {
+ perror("Open failed");
+ exit(1);
+ }
+
+ addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ unlink(FILE_NAME);
+ exit(1);
+ }
+
+ printf("Returned address is %p\n", addr);
+ check_bytes(addr);
+ write_bytes(addr);
+ ret = read_bytes(addr);
+
+ munmap(addr, LENGTH);
+ close(fd);
+ unlink(FILE_NAME);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/vm/hugepage-shm.c b/tools/testing/selftests/vm/hugepage-shm.c
new file mode 100644
index 000000000..e2527f320
--- /dev/null
+++ b/tools/testing/selftests/vm/hugepage-shm.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * hugepage-shm:
+ *
+ * Example of using huge page memory in a user application using Sys V shared
+ * memory system calls. In this example the app is requesting 256MB of
+ * memory that is backed by huge pages. The application uses the flag
+ * SHM_HUGETLB in the shmget system call to inform the kernel that it is
+ * requesting huge pages.
+ *
+ * For the ia64 architecture, the Linux kernel reserves Region number 4 for
+ * huge pages. That means that if one requires a fixed address, a huge page
+ * aligned address starting with 0x800000... will be required. If a fixed
+ * address is not required, the kernel will select an address in the proper
+ * range.
+ * Other architectures, such as ppc64, i386 or x86_64 are not so constrained.
+ *
+ * Note: The default shared memory limit is quite low on many kernels,
+ * you may need to increase it via:
+ *
+ * echo 268435456 > /proc/sys/kernel/shmmax
+ *
+ * This will increase the maximum size per shared memory segment to 256MB.
+ * The other limit that you will hit eventually is shmall which is the
+ * total amount of shared memory in pages. To set it to 16GB on a system
+ * with a 4kB pagesize do:
+ *
+ * echo 4194304 > /proc/sys/kernel/shmall
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
+
+#ifndef SHM_HUGETLB
+#define SHM_HUGETLB 04000
+#endif
+
+#define LENGTH (256UL*1024*1024)
+
+#define dprintf(x) printf(x)
+
+/* Only ia64 requires this */
+#ifdef __ia64__
+#define ADDR (void *)(0x8000000000000000UL)
+#define SHMAT_FLAGS (SHM_RND)
+#else
+#define ADDR (void *)(0x0UL)
+#define SHMAT_FLAGS (0)
+#endif
+
+int main(void)
+{
+ int shmid;
+ unsigned long i;
+ char *shmaddr;
+
+ shmid = shmget(2, LENGTH, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W);
+ if (shmid < 0) {
+ perror("shmget");
+ exit(1);
+ }
+ printf("shmid: 0x%x\n", shmid);
+
+ shmaddr = shmat(shmid, ADDR, SHMAT_FLAGS);
+ if (shmaddr == (char *)-1) {
+ perror("Shared memory attach failure");
+ shmctl(shmid, IPC_RMID, NULL);
+ exit(2);
+ }
+ printf("shmaddr: %p\n", shmaddr);
+
+ dprintf("Starting the writes:\n");
+ for (i = 0; i < LENGTH; i++) {
+ shmaddr[i] = (char)(i);
+ if (!(i % (1024 * 1024)))
+ dprintf(".");
+ }
+ dprintf("\n");
+
+ dprintf("Starting the Check...");
+ for (i = 0; i < LENGTH; i++)
+ if (shmaddr[i] != (char)i) {
+ printf("\nIndex %lu mismatched\n", i);
+ exit(3);
+ }
+ dprintf("Done.\n");
+
+ if (shmdt((const void *)shmaddr) != 0) {
+ perror("Detach failure");
+ shmctl(shmid, IPC_RMID, NULL);
+ exit(4);
+ }
+
+ shmctl(shmid, IPC_RMID, NULL);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c
new file mode 100644
index 000000000..9b777fa95
--- /dev/null
+++ b/tools/testing/selftests/vm/map_hugetlb.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Example of using hugepage memory in a user application using the mmap
+ * system call with MAP_HUGETLB flag. Before running this program make
+ * sure the administrator has allocated enough default sized huge pages
+ * to cover the 256 MB allocation.
+ *
+ * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages.
+ * That means the addresses starting with 0x800000... will need to be
+ * specified. Specifying a fixed address is not required on ppc64, i386
+ * or x86_64.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#define LENGTH (256UL*1024*1024)
+#define PROTECTION (PROT_READ | PROT_WRITE)
+
+#ifndef MAP_HUGETLB
+#define MAP_HUGETLB 0x40000 /* arch specific */
+#endif
+
+/* Only ia64 requires this */
+#ifdef __ia64__
+#define ADDR (void *)(0x8000000000000000UL)
+#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED)
+#else
+#define ADDR (void *)(0x0UL)
+#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
+#endif
+
+static void check_bytes(char *addr)
+{
+ printf("First hex is %x\n", *((unsigned int *)addr));
+}
+
+static void write_bytes(char *addr)
+{
+ unsigned long i;
+
+ for (i = 0; i < LENGTH; i++)
+ *(addr + i) = (char)i;
+}
+
+static int read_bytes(char *addr)
+{
+ unsigned long i;
+
+ check_bytes(addr);
+ for (i = 0; i < LENGTH; i++)
+ if (*(addr + i) != (char)i) {
+ printf("Mismatch at %lu\n", i);
+ return 1;
+ }
+ return 0;
+}
+
+int main(void)
+{
+ void *addr;
+ int ret;
+
+ addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, -1, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ printf("Returned address is %p\n", addr);
+ check_bytes(addr);
+ write_bytes(addr);
+ ret = read_bytes(addr);
+
+ /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
+ if (munmap(addr, LENGTH)) {
+ perror("munmap");
+ exit(1);
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/vm/map_populate.c b/tools/testing/selftests/vm/map_populate.c
new file mode 100644
index 000000000..6b8aeaa0b
--- /dev/null
+++ b/tools/testing/selftests/vm/map_populate.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Dmitry Safonov, Arista Networks
+ *
+ * MAP_POPULATE | MAP_PRIVATE should COW VMA pages.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#ifndef MMAP_SZ
+#define MMAP_SZ 4096
+#endif
+
+#define BUG_ON(condition, description) \
+ do { \
+ if (condition) { \
+ fprintf(stderr, "[FAIL]\t%s:%d\t%s:%s\n", __func__, \
+ __LINE__, (description), strerror(errno)); \
+ exit(1); \
+ } \
+ } while (0)
+
+static int parent_f(int sock, unsigned long *smap, int child)
+{
+ int status, ret;
+
+ ret = read(sock, &status, sizeof(int));
+ BUG_ON(ret <= 0, "read(sock)");
+
+ *smap = 0x22222BAD;
+ ret = msync(smap, MMAP_SZ, MS_SYNC);
+ BUG_ON(ret, "msync()");
+
+ ret = write(sock, &status, sizeof(int));
+ BUG_ON(ret <= 0, "write(sock)");
+
+ waitpid(child, &status, 0);
+ BUG_ON(!WIFEXITED(status), "child in unexpected state");
+
+ return WEXITSTATUS(status);
+}
+
+static int child_f(int sock, unsigned long *smap, int fd)
+{
+ int ret, buf = 0;
+
+ smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_POPULATE, fd, 0);
+ BUG_ON(smap == MAP_FAILED, "mmap()");
+
+ BUG_ON(*smap != 0xdeadbabe, "MAP_PRIVATE | MAP_POPULATE changed file");
+
+ ret = write(sock, &buf, sizeof(int));
+ BUG_ON(ret <= 0, "write(sock)");
+
+ ret = read(sock, &buf, sizeof(int));
+ BUG_ON(ret <= 0, "read(sock)");
+
+ BUG_ON(*smap == 0x22222BAD, "MAP_POPULATE didn't COW private page");
+ BUG_ON(*smap != 0xdeadbabe, "mapping was corrupted");
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int sock[2], child, ret;
+ FILE *ftmp;
+ unsigned long *smap;
+
+ ftmp = tmpfile();
+ BUG_ON(ftmp == 0, "tmpfile()");
+
+ ret = ftruncate(fileno(ftmp), MMAP_SZ);
+ BUG_ON(ret, "ftruncate()");
+
+ smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fileno(ftmp), 0);
+ BUG_ON(smap == MAP_FAILED, "mmap()");
+
+ *smap = 0xdeadbabe;
+ /* Probably unnecessary, but let it be. */
+ ret = msync(smap, MMAP_SZ, MS_SYNC);
+ BUG_ON(ret, "msync()");
+
+ ret = socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sock);
+ BUG_ON(ret, "socketpair()");
+
+ child = fork();
+ BUG_ON(child == -1, "fork()");
+
+ if (child) {
+ ret = close(sock[0]);
+ BUG_ON(ret, "close()");
+
+ return parent_f(sock[1], smap, child);
+ }
+
+ ret = close(sock[1]);
+ BUG_ON(ret, "close()");
+
+ return child_f(sock[0], smap, fileno(ftmp));
+}
diff --git a/tools/testing/selftests/vm/mlock-random-test.c b/tools/testing/selftests/vm/mlock-random-test.c
new file mode 100644
index 000000000..ff4d72eb7
--- /dev/null
+++ b/tools/testing/selftests/vm/mlock-random-test.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * It tests the mlock/mlock2() when they are invoked
+ * on randomly memory region.
+ */
+#include <unistd.h>
+#include <sys/resource.h>
+#include <sys/capability.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <time.h>
+#include "mlock2.h"
+
+#define CHUNK_UNIT (128 * 1024)
+#define MLOCK_RLIMIT_SIZE (CHUNK_UNIT * 2)
+#define MLOCK_WITHIN_LIMIT_SIZE CHUNK_UNIT
+#define MLOCK_OUTOF_LIMIT_SIZE (CHUNK_UNIT * 3)
+
+#define TEST_LOOP 100
+#define PAGE_ALIGN(size, ps) (((size) + ((ps) - 1)) & ~((ps) - 1))
+
+int set_cap_limits(rlim_t max)
+{
+ struct rlimit new;
+ cap_t cap = cap_init();
+
+ new.rlim_cur = max;
+ new.rlim_max = max;
+ if (setrlimit(RLIMIT_MEMLOCK, &new)) {
+ perror("setrlimit() returns error\n");
+ return -1;
+ }
+
+ /* drop capabilities including CAP_IPC_LOCK */
+ if (cap_set_proc(cap)) {
+ perror("cap_set_proc() returns error\n");
+ return -2;
+ }
+
+ return 0;
+}
+
+int get_proc_locked_vm_size(void)
+{
+ FILE *f;
+ int ret = -1;
+ char line[1024] = {0};
+ unsigned long lock_size = 0;
+
+ f = fopen("/proc/self/status", "r");
+ if (!f) {
+ perror("fopen");
+ return -1;
+ }
+
+ while (fgets(line, 1024, f)) {
+ if (strstr(line, "VmLck")) {
+ ret = sscanf(line, "VmLck:\t%8lu kB", &lock_size);
+ if (ret <= 0) {
+ printf("sscanf() on VmLck error: %s: %d\n",
+ line, ret);
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return (int)(lock_size << 10);
+ }
+ }
+
+ perror("cann't parse VmLck in /proc/self/status\n");
+ fclose(f);
+ return -1;
+}
+
+/*
+ * Get the MMUPageSize of the memory region including input
+ * address from proc file.
+ *
+ * return value: on error case, 0 will be returned.
+ * Otherwise the page size(in bytes) is returned.
+ */
+int get_proc_page_size(unsigned long addr)
+{
+ FILE *smaps;
+ char *line;
+ unsigned long mmupage_size = 0;
+ size_t size;
+
+ smaps = seek_to_smaps_entry(addr);
+ if (!smaps) {
+ printf("Unable to parse /proc/self/smaps\n");
+ return 0;
+ }
+
+ while (getline(&line, &size, smaps) > 0) {
+ if (!strstr(line, "MMUPageSize")) {
+ free(line);
+ line = NULL;
+ size = 0;
+ continue;
+ }
+
+ /* found the MMUPageSize of this section */
+ if (sscanf(line, "MMUPageSize: %8lu kB",
+ &mmupage_size) < 1) {
+ printf("Unable to parse smaps entry for Size:%s\n",
+ line);
+ break;
+ }
+
+ }
+ free(line);
+ if (smaps)
+ fclose(smaps);
+ return mmupage_size << 10;
+}
+
+/*
+ * Test mlock/mlock2() on provided memory chunk.
+ * It expects the mlock/mlock2() to be successful (within rlimit)
+ *
+ * With allocated memory chunk [p, p + alloc_size), this
+ * test will choose start/len randomly to perform mlock/mlock2
+ * [start, start + len] memory range. The range is within range
+ * of the allocated chunk.
+ *
+ * The memory region size alloc_size is within the rlimit.
+ * So we always expect a success of mlock/mlock2.
+ *
+ * VmLck is assumed to be 0 before this test.
+ *
+ * return value: 0 - success
+ * else: failure
+ */
+int test_mlock_within_limit(char *p, int alloc_size)
+{
+ int i;
+ int ret = 0;
+ int locked_vm_size = 0;
+ struct rlimit cur;
+ int page_size = 0;
+
+ getrlimit(RLIMIT_MEMLOCK, &cur);
+ if (cur.rlim_cur < alloc_size) {
+ printf("alloc_size[%d] < %u rlimit,lead to mlock failure\n",
+ alloc_size, (unsigned int)cur.rlim_cur);
+ return -1;
+ }
+
+ srand(time(NULL));
+ for (i = 0; i < TEST_LOOP; i++) {
+ /*
+ * - choose mlock/mlock2 randomly
+ * - choose lock_size randomly but lock_size < alloc_size
+ * - choose start_offset randomly but p+start_offset+lock_size
+ * < p+alloc_size
+ */
+ int is_mlock = !!(rand() % 2);
+ int lock_size = rand() % alloc_size;
+ int start_offset = rand() % (alloc_size - lock_size);
+
+ if (is_mlock)
+ ret = mlock(p + start_offset, lock_size);
+ else
+ ret = mlock2_(p + start_offset, lock_size,
+ MLOCK_ONFAULT);
+
+ if (ret) {
+ printf("%s() failure at |%p(%d)| mlock:|%p(%d)|\n",
+ is_mlock ? "mlock" : "mlock2",
+ p, alloc_size,
+ p + start_offset, lock_size);
+ return ret;
+ }
+ }
+
+ /*
+ * Check VmLck left by the tests.
+ */
+ locked_vm_size = get_proc_locked_vm_size();
+ page_size = get_proc_page_size((unsigned long)p);
+ if (page_size == 0) {
+ printf("cannot get proc MMUPageSize\n");
+ return -1;
+ }
+
+ if (locked_vm_size > PAGE_ALIGN(alloc_size, page_size) + page_size) {
+ printf("test_mlock_within_limit() left VmLck:%d on %d chunk\n",
+ locked_vm_size, alloc_size);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+/*
+ * We expect the mlock/mlock2() to be fail (outof limitation)
+ *
+ * With allocated memory chunk [p, p + alloc_size), this
+ * test will randomly choose start/len and perform mlock/mlock2
+ * on [start, start+len] range.
+ *
+ * The memory region size alloc_size is above the rlimit.
+ * And the len to be locked is higher than rlimit.
+ * So we always expect a failure of mlock/mlock2.
+ * No locked page number should be increased as a side effect.
+ *
+ * return value: 0 - success
+ * else: failure
+ */
+int test_mlock_outof_limit(char *p, int alloc_size)
+{
+ int i;
+ int ret = 0;
+ int locked_vm_size = 0, old_locked_vm_size = 0;
+ struct rlimit cur;
+
+ getrlimit(RLIMIT_MEMLOCK, &cur);
+ if (cur.rlim_cur >= alloc_size) {
+ printf("alloc_size[%d] >%u rlimit, violates test condition\n",
+ alloc_size, (unsigned int)cur.rlim_cur);
+ return -1;
+ }
+
+ old_locked_vm_size = get_proc_locked_vm_size();
+ srand(time(NULL));
+ for (i = 0; i < TEST_LOOP; i++) {
+ int is_mlock = !!(rand() % 2);
+ int lock_size = (rand() % (alloc_size - cur.rlim_cur))
+ + cur.rlim_cur;
+ int start_offset = rand() % (alloc_size - lock_size);
+
+ if (is_mlock)
+ ret = mlock(p + start_offset, lock_size);
+ else
+ ret = mlock2_(p + start_offset, lock_size,
+ MLOCK_ONFAULT);
+ if (ret == 0) {
+ printf("%s() succeeds? on %p(%d) mlock%p(%d)\n",
+ is_mlock ? "mlock" : "mlock2",
+ p, alloc_size,
+ p + start_offset, lock_size);
+ return -1;
+ }
+ }
+
+ locked_vm_size = get_proc_locked_vm_size();
+ if (locked_vm_size != old_locked_vm_size) {
+ printf("tests leads to new mlocked page: old[%d], new[%d]\n",
+ old_locked_vm_size,
+ locked_vm_size);
+ return -1;
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ char *p = NULL;
+ int ret = 0;
+
+ if (set_cap_limits(MLOCK_RLIMIT_SIZE))
+ return -1;
+
+ p = malloc(MLOCK_WITHIN_LIMIT_SIZE);
+ if (p == NULL) {
+ perror("malloc() failure\n");
+ return -1;
+ }
+ ret = test_mlock_within_limit(p, MLOCK_WITHIN_LIMIT_SIZE);
+ if (ret)
+ return ret;
+ munlock(p, MLOCK_WITHIN_LIMIT_SIZE);
+ free(p);
+
+
+ p = malloc(MLOCK_OUTOF_LIMIT_SIZE);
+ if (p == NULL) {
+ perror("malloc() failure\n");
+ return -1;
+ }
+ ret = test_mlock_outof_limit(p, MLOCK_OUTOF_LIMIT_SIZE);
+ if (ret)
+ return ret;
+ munlock(p, MLOCK_OUTOF_LIMIT_SIZE);
+ free(p);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/vm/mlock2-tests.c
new file mode 100644
index 000000000..11b2301f3
--- /dev/null
+++ b/tools/testing/selftests/vm/mlock2-tests.c
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sys/mman.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <stdbool.h>
+#include "mlock2.h"
+
+#include "../kselftest.h"
+
+struct vm_boundaries {
+ unsigned long start;
+ unsigned long end;
+};
+
+static int get_vm_area(unsigned long addr, struct vm_boundaries *area)
+{
+ FILE *file;
+ int ret = 1;
+ char line[1024] = {0};
+ char *end_addr;
+ char *stop;
+ unsigned long start;
+ unsigned long end;
+
+ if (!area)
+ return ret;
+
+ file = fopen("/proc/self/maps", "r");
+ if (!file) {
+ perror("fopen");
+ return ret;
+ }
+
+ memset(area, 0, sizeof(struct vm_boundaries));
+
+ while(fgets(line, 1024, file)) {
+ end_addr = strchr(line, '-');
+ if (!end_addr) {
+ printf("cannot parse /proc/self/maps\n");
+ goto out;
+ }
+ *end_addr = '\0';
+ end_addr++;
+ stop = strchr(end_addr, ' ');
+ if (!stop) {
+ printf("cannot parse /proc/self/maps\n");
+ goto out;
+ }
+ stop = '\0';
+
+ sscanf(line, "%lx", &start);
+ sscanf(end_addr, "%lx", &end);
+
+ if (start <= addr && end > addr) {
+ area->start = start;
+ area->end = end;
+ ret = 0;
+ goto out;
+ }
+ }
+out:
+ fclose(file);
+ return ret;
+}
+
+#define VMFLAGS "VmFlags:"
+
+static bool is_vmflag_set(unsigned long addr, const char *vmflag)
+{
+ char *line = NULL;
+ char *flags;
+ size_t size = 0;
+ bool ret = false;
+ FILE *smaps;
+
+ smaps = seek_to_smaps_entry(addr);
+ if (!smaps) {
+ printf("Unable to parse /proc/self/smaps\n");
+ goto out;
+ }
+
+ while (getline(&line, &size, smaps) > 0) {
+ if (!strstr(line, VMFLAGS)) {
+ free(line);
+ line = NULL;
+ size = 0;
+ continue;
+ }
+
+ flags = line + strlen(VMFLAGS);
+ ret = (strstr(flags, vmflag) != NULL);
+ goto out;
+ }
+
+out:
+ free(line);
+ fclose(smaps);
+ return ret;
+}
+
+#define SIZE "Size:"
+#define RSS "Rss:"
+#define LOCKED "lo"
+
+static unsigned long get_value_for_name(unsigned long addr, const char *name)
+{
+ char *line = NULL;
+ size_t size = 0;
+ char *value_ptr;
+ FILE *smaps = NULL;
+ unsigned long value = -1UL;
+
+ smaps = seek_to_smaps_entry(addr);
+ if (!smaps) {
+ printf("Unable to parse /proc/self/smaps\n");
+ goto out;
+ }
+
+ while (getline(&line, &size, smaps) > 0) {
+ if (!strstr(line, name)) {
+ free(line);
+ line = NULL;
+ size = 0;
+ continue;
+ }
+
+ value_ptr = line + strlen(name);
+ if (sscanf(value_ptr, "%lu kB", &value) < 1) {
+ printf("Unable to parse smaps entry for Size\n");
+ goto out;
+ }
+ break;
+ }
+
+out:
+ if (smaps)
+ fclose(smaps);
+ free(line);
+ return value;
+}
+
+static bool is_vma_lock_on_fault(unsigned long addr)
+{
+ bool locked;
+ unsigned long vma_size, vma_rss;
+
+ locked = is_vmflag_set(addr, LOCKED);
+ if (!locked)
+ return false;
+
+ vma_size = get_value_for_name(addr, SIZE);
+ vma_rss = get_value_for_name(addr, RSS);
+
+ /* only one page is faulted in */
+ return (vma_rss < vma_size);
+}
+
+#define PRESENT_BIT 0x8000000000000000ULL
+#define PFN_MASK 0x007FFFFFFFFFFFFFULL
+#define UNEVICTABLE_BIT (1UL << 18)
+
+static int lock_check(unsigned long addr)
+{
+ bool locked;
+ unsigned long vma_size, vma_rss;
+
+ locked = is_vmflag_set(addr, LOCKED);
+ if (!locked)
+ return false;
+
+ vma_size = get_value_for_name(addr, SIZE);
+ vma_rss = get_value_for_name(addr, RSS);
+
+ return (vma_rss == vma_size);
+}
+
+static int unlock_lock_check(char *map)
+{
+ if (is_vmflag_set((unsigned long)map, LOCKED)) {
+ printf("VMA flag %s is present on page 1 after unlock\n", LOCKED);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int test_mlock_lock()
+{
+ char *map;
+ int ret = 1;
+ unsigned long page_size = getpagesize();
+
+ map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (map == MAP_FAILED) {
+ perror("test_mlock_locked mmap");
+ goto out;
+ }
+
+ if (mlock2_(map, 2 * page_size, 0)) {
+ if (errno == ENOSYS) {
+ printf("Cannot call new mlock family, skipping test\n");
+ _exit(KSFT_SKIP);
+ }
+ perror("mlock2(0)");
+ goto unmap;
+ }
+
+ if (!lock_check((unsigned long)map))
+ goto unmap;
+
+ /* Now unlock and recheck attributes */
+ if (munlock(map, 2 * page_size)) {
+ perror("munlock()");
+ goto unmap;
+ }
+
+ ret = unlock_lock_check(map);
+
+unmap:
+ munmap(map, 2 * page_size);
+out:
+ return ret;
+}
+
+static int onfault_check(char *map)
+{
+ *map = 'a';
+ if (!is_vma_lock_on_fault((unsigned long)map)) {
+ printf("VMA is not marked for lock on fault\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static int unlock_onfault_check(char *map)
+{
+ unsigned long page_size = getpagesize();
+
+ if (is_vma_lock_on_fault((unsigned long)map) ||
+ is_vma_lock_on_fault((unsigned long)map + page_size)) {
+ printf("VMA is still lock on fault after unlock\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static int test_mlock_onfault()
+{
+ char *map;
+ int ret = 1;
+ unsigned long page_size = getpagesize();
+
+ map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (map == MAP_FAILED) {
+ perror("test_mlock_locked mmap");
+ goto out;
+ }
+
+ if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
+ if (errno == ENOSYS) {
+ printf("Cannot call new mlock family, skipping test\n");
+ _exit(KSFT_SKIP);
+ }
+ perror("mlock2(MLOCK_ONFAULT)");
+ goto unmap;
+ }
+
+ if (onfault_check(map))
+ goto unmap;
+
+ /* Now unlock and recheck attributes */
+ if (munlock(map, 2 * page_size)) {
+ if (errno == ENOSYS) {
+ printf("Cannot call new mlock family, skipping test\n");
+ _exit(KSFT_SKIP);
+ }
+ perror("munlock()");
+ goto unmap;
+ }
+
+ ret = unlock_onfault_check(map);
+unmap:
+ munmap(map, 2 * page_size);
+out:
+ return ret;
+}
+
+static int test_lock_onfault_of_present()
+{
+ char *map;
+ int ret = 1;
+ unsigned long page_size = getpagesize();
+
+ map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (map == MAP_FAILED) {
+ perror("test_mlock_locked mmap");
+ goto out;
+ }
+
+ *map = 'a';
+
+ if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
+ if (errno == ENOSYS) {
+ printf("Cannot call new mlock family, skipping test\n");
+ _exit(KSFT_SKIP);
+ }
+ perror("mlock2(MLOCK_ONFAULT)");
+ goto unmap;
+ }
+
+ if (!is_vma_lock_on_fault((unsigned long)map) ||
+ !is_vma_lock_on_fault((unsigned long)map + page_size)) {
+ printf("VMA with present pages is not marked lock on fault\n");
+ goto unmap;
+ }
+ ret = 0;
+unmap:
+ munmap(map, 2 * page_size);
+out:
+ return ret;
+}
+
+static int test_munlockall()
+{
+ char *map;
+ int ret = 1;
+ unsigned long page_size = getpagesize();
+
+ map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+
+ if (map == MAP_FAILED) {
+ perror("test_munlockall mmap");
+ goto out;
+ }
+
+ if (mlockall(MCL_CURRENT)) {
+ perror("mlockall(MCL_CURRENT)");
+ goto out;
+ }
+
+ if (!lock_check((unsigned long)map))
+ goto unmap;
+
+ if (munlockall()) {
+ perror("munlockall()");
+ goto unmap;
+ }
+
+ if (unlock_lock_check(map))
+ goto unmap;
+
+ munmap(map, 2 * page_size);
+
+ map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+
+ if (map == MAP_FAILED) {
+ perror("test_munlockall second mmap");
+ goto out;
+ }
+
+ if (mlockall(MCL_CURRENT | MCL_ONFAULT)) {
+ perror("mlockall(MCL_CURRENT | MCL_ONFAULT)");
+ goto unmap;
+ }
+
+ if (onfault_check(map))
+ goto unmap;
+
+ if (munlockall()) {
+ perror("munlockall()");
+ goto unmap;
+ }
+
+ if (unlock_onfault_check(map))
+ goto unmap;
+
+ if (mlockall(MCL_CURRENT | MCL_FUTURE)) {
+ perror("mlockall(MCL_CURRENT | MCL_FUTURE)");
+ goto out;
+ }
+
+ if (!lock_check((unsigned long)map))
+ goto unmap;
+
+ if (munlockall()) {
+ perror("munlockall()");
+ goto unmap;
+ }
+
+ ret = unlock_lock_check(map);
+
+unmap:
+ munmap(map, 2 * page_size);
+out:
+ munlockall();
+ return ret;
+}
+
+static int test_vma_management(bool call_mlock)
+{
+ int ret = 1;
+ void *map;
+ unsigned long page_size = getpagesize();
+ struct vm_boundaries page1;
+ struct vm_boundaries page2;
+ struct vm_boundaries page3;
+
+ map = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (map == MAP_FAILED) {
+ perror("mmap()");
+ return ret;
+ }
+
+ if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) {
+ if (errno == ENOSYS) {
+ printf("Cannot call new mlock family, skipping test\n");
+ _exit(KSFT_SKIP);
+ }
+ perror("mlock(ONFAULT)\n");
+ goto out;
+ }
+
+ if (get_vm_area((unsigned long)map, &page1) ||
+ get_vm_area((unsigned long)map + page_size, &page2) ||
+ get_vm_area((unsigned long)map + page_size * 2, &page3)) {
+ printf("couldn't find mapping in /proc/self/maps\n");
+ goto out;
+ }
+
+ /*
+ * Before we unlock a portion, we need to that all three pages are in
+ * the same VMA. If they are not we abort this test (Note that this is
+ * not a failure)
+ */
+ if (page1.start != page2.start || page2.start != page3.start) {
+ printf("VMAs are not merged to start, aborting test\n");
+ ret = 0;
+ goto out;
+ }
+
+ if (munlock(map + page_size, page_size)) {
+ perror("munlock()");
+ goto out;
+ }
+
+ if (get_vm_area((unsigned long)map, &page1) ||
+ get_vm_area((unsigned long)map + page_size, &page2) ||
+ get_vm_area((unsigned long)map + page_size * 2, &page3)) {
+ printf("couldn't find mapping in /proc/self/maps\n");
+ goto out;
+ }
+
+ /* All three VMAs should be different */
+ if (page1.start == page2.start || page2.start == page3.start) {
+ printf("failed to split VMA for munlock\n");
+ goto out;
+ }
+
+ /* Now unlock the first and third page and check the VMAs again */
+ if (munlock(map, page_size * 3)) {
+ perror("munlock()");
+ goto out;
+ }
+
+ if (get_vm_area((unsigned long)map, &page1) ||
+ get_vm_area((unsigned long)map + page_size, &page2) ||
+ get_vm_area((unsigned long)map + page_size * 2, &page3)) {
+ printf("couldn't find mapping in /proc/self/maps\n");
+ goto out;
+ }
+
+ /* Now all three VMAs should be the same */
+ if (page1.start != page2.start || page2.start != page3.start) {
+ printf("failed to merge VMAs after munlock\n");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ munmap(map, 3 * page_size);
+ return ret;
+}
+
+static int test_mlockall(int (test_function)(bool call_mlock))
+{
+ int ret = 1;
+
+ if (mlockall(MCL_CURRENT | MCL_ONFAULT | MCL_FUTURE)) {
+ perror("mlockall");
+ return ret;
+ }
+
+ ret = test_function(false);
+ munlockall();
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ int ret = 0;
+ ret += test_mlock_lock();
+ ret += test_mlock_onfault();
+ ret += test_munlockall();
+ ret += test_lock_onfault_of_present();
+ ret += test_vma_management(true);
+ ret += test_mlockall(test_vma_management);
+ return ret;
+}
diff --git a/tools/testing/selftests/vm/mlock2.h b/tools/testing/selftests/vm/mlock2.h
new file mode 100644
index 000000000..2a6e76c22
--- /dev/null
+++ b/tools/testing/selftests/vm/mlock2.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <syscall.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef MLOCK_ONFAULT
+#define MLOCK_ONFAULT 1
+#endif
+
+#ifndef MCL_ONFAULT
+#define MCL_ONFAULT (MCL_FUTURE << 1)
+#endif
+
+static int mlock2_(void *start, size_t len, int flags)
+{
+#ifdef __NR_mlock2
+ return syscall(__NR_mlock2, start, len, flags);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+static FILE *seek_to_smaps_entry(unsigned long addr)
+{
+ FILE *file;
+ char *line = NULL;
+ size_t size = 0;
+ unsigned long start, end;
+ char perms[5];
+ unsigned long offset;
+ char dev[32];
+ unsigned long inode;
+ char path[BUFSIZ];
+
+ file = fopen("/proc/self/smaps", "r");
+ if (!file) {
+ perror("fopen smaps");
+ _exit(1);
+ }
+
+ while (getline(&line, &size, file) > 0) {
+ if (sscanf(line, "%lx-%lx %s %lx %s %lu %s\n",
+ &start, &end, perms, &offset, dev, &inode, path) < 6)
+ goto next;
+
+ if (start <= addr && addr < end)
+ goto out;
+
+next:
+ free(line);
+ line = NULL;
+ size = 0;
+ }
+
+ fclose(file);
+ file = NULL;
+
+out:
+ free(line);
+ return file;
+}
diff --git a/tools/testing/selftests/vm/on-fault-limit.c b/tools/testing/selftests/vm/on-fault-limit.c
new file mode 100644
index 000000000..634d87dfb
--- /dev/null
+++ b/tools/testing/selftests/vm/on-fault-limit.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/mman.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifndef MCL_ONFAULT
+#define MCL_ONFAULT (MCL_FUTURE << 1)
+#endif
+
+static int test_limit(void)
+{
+ int ret = 1;
+ struct rlimit lims;
+ void *map;
+
+ if (getrlimit(RLIMIT_MEMLOCK, &lims)) {
+ perror("getrlimit");
+ return ret;
+ }
+
+ if (mlockall(MCL_ONFAULT | MCL_FUTURE)) {
+ perror("mlockall");
+ return ret;
+ }
+
+ map = mmap(NULL, 2 * lims.rlim_max, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0);
+ if (map != MAP_FAILED)
+ printf("mmap should have failed, but didn't\n");
+ else {
+ ret = 0;
+ munmap(map, 2 * lims.rlim_max);
+ }
+
+ munlockall();
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ int ret = 0;
+
+ ret += test_limit();
+ return ret;
+}
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
new file mode 100755
index 000000000..584a91ae4
--- /dev/null
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -0,0 +1,214 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#please run as root
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+mnt=./huge
+exitcode=0
+
+#get huge pagesize and freepages from /proc/meminfo
+while read name size unit; do
+ if [ "$name" = "HugePages_Free:" ]; then
+ freepgs=$size
+ fi
+ if [ "$name" = "Hugepagesize:" ]; then
+ hpgsize_KB=$size
+ fi
+done < /proc/meminfo
+
+# Simple hugetlbfs tests have a hardcoded minimum requirement of
+# huge pages totaling 256MB (262144KB) in size. The userfaultfd
+# hugetlb test requires a minimum of 2 * nr_cpus huge pages. Take
+# both of these requirements into account and attempt to increase
+# number of huge pages available.
+nr_cpus=$(nproc)
+hpgsize_MB=$((hpgsize_KB / 1024))
+half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128))
+needmem_KB=$((half_ufd_size_MB * 2 * 1024))
+
+#set proper nr_hugepages
+if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
+ nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
+ needpgs=$((needmem_KB / hpgsize_KB))
+ tries=2
+ while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do
+ lackpgs=$(( $needpgs - $freepgs ))
+ echo 3 > /proc/sys/vm/drop_caches
+ echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
+ if [ $? -ne 0 ]; then
+ echo "Please run this test as root"
+ exit $ksft_skip
+ fi
+ while read name size unit; do
+ if [ "$name" = "HugePages_Free:" ]; then
+ freepgs=$size
+ fi
+ done < /proc/meminfo
+ tries=$((tries - 1))
+ done
+ if [ $freepgs -lt $needpgs ]; then
+ printf "Not enough huge pages available (%d < %d)\n" \
+ $freepgs $needpgs
+ exit 1
+ fi
+else
+ echo "no hugetlbfs support in kernel?"
+ exit 1
+fi
+
+mkdir $mnt
+mount -t hugetlbfs none $mnt
+
+echo "---------------------"
+echo "running hugepage-mmap"
+echo "---------------------"
+./hugepage-mmap
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+shmmax=`cat /proc/sys/kernel/shmmax`
+shmall=`cat /proc/sys/kernel/shmall`
+echo 268435456 > /proc/sys/kernel/shmmax
+echo 4194304 > /proc/sys/kernel/shmall
+echo "--------------------"
+echo "running hugepage-shm"
+echo "--------------------"
+./hugepage-shm
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+echo $shmmax > /proc/sys/kernel/shmmax
+echo $shmall > /proc/sys/kernel/shmall
+
+echo "-------------------"
+echo "running map_hugetlb"
+echo "-------------------"
+./map_hugetlb
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "NOTE: The above hugetlb tests provide minimal coverage. Use"
+echo " https://github.com/libhugetlbfs/libhugetlbfs.git for"
+echo " hugetlb regression testing."
+
+echo "-------------------"
+echo "running userfaultfd"
+echo "-------------------"
+./userfaultfd anon 128 32
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "---------------------------"
+echo "running userfaultfd_hugetlb"
+echo "---------------------------"
+# Test requires source and destination huge pages. Size of source
+# (half_ufd_size_MB) is passed as argument to test.
+./userfaultfd hugetlb $half_ufd_size_MB 32 $mnt/ufd_test_file
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+rm -f $mnt/ufd_test_file
+
+echo "-------------------------"
+echo "running userfaultfd_shmem"
+echo "-------------------------"
+./userfaultfd shmem 128 32
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+#cleanup
+umount $mnt
+rm -rf $mnt
+echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
+
+echo "-----------------------"
+echo "running compaction_test"
+echo "-----------------------"
+./compaction_test
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "----------------------"
+echo "running on-fault-limit"
+echo "----------------------"
+sudo -u nobody ./on-fault-limit
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "--------------------"
+echo "running map_populate"
+echo "--------------------"
+./map_populate
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "--------------------"
+echo "running mlock2-tests"
+echo "--------------------"
+./mlock2-tests
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "-----------------------------"
+echo "running virtual_address_range"
+echo "-----------------------------"
+./virtual_address_range
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "-----------------------------"
+echo "running virtual address 128TB switch test"
+echo "-----------------------------"
+./va_128TBswitch
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+exit $exitcode
diff --git a/tools/testing/selftests/vm/thuge-gen.c b/tools/testing/selftests/vm/thuge-gen.c
new file mode 100644
index 000000000..361ef7192
--- /dev/null
+++ b/tools/testing/selftests/vm/thuge-gen.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test selecting other page sizes for mmap/shmget.
+
+ Before running this huge pages for each huge page size must have been
+ reserved.
+ For large pages beyond MAX_ORDER (like 1GB on x86) boot options must be used.
+ Also shmmax must be increased.
+ And you need to run as root to work around some weird permissions in shm.
+ And nothing using huge pages should run in parallel.
+ When the program aborts you may need to clean up the shm segments with
+ ipcrm -m by hand, like this
+ sudo ipcs | awk '$1 == "0x00000000" {print $2}' | xargs -n1 sudo ipcrm -m
+ (warning this will remove all if someone else uses them) */
+
+#define _GNU_SOURCE 1
+#include <sys/mman.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <glob.h>
+#include <assert.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <string.h>
+
+#define err(x) perror(x), exit(1)
+
+#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
+#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
+#define MAP_HUGE_SHIFT 26
+#define MAP_HUGE_MASK 0x3f
+#if !defined(MAP_HUGETLB)
+#define MAP_HUGETLB 0x40000
+#endif
+
+#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */
+#define SHM_HUGE_SHIFT 26
+#define SHM_HUGE_MASK 0x3f
+#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT)
+#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
+
+#define NUM_PAGESIZES 5
+
+#define NUM_PAGES 4
+
+#define Dprintf(fmt...) // printf(fmt)
+
+unsigned long page_sizes[NUM_PAGESIZES];
+int num_page_sizes;
+
+int ilog2(unsigned long v)
+{
+ int l = 0;
+ while ((1UL << l) < v)
+ l++;
+ return l;
+}
+
+void find_pagesizes(void)
+{
+ glob_t g;
+ int i;
+ glob("/sys/kernel/mm/hugepages/hugepages-*kB", 0, NULL, &g);
+ assert(g.gl_pathc <= NUM_PAGESIZES);
+ for (i = 0; i < g.gl_pathc; i++) {
+ sscanf(g.gl_pathv[i], "/sys/kernel/mm/hugepages/hugepages-%lukB",
+ &page_sizes[i]);
+ page_sizes[i] <<= 10;
+ printf("Found %luMB\n", page_sizes[i] >> 20);
+ }
+ num_page_sizes = g.gl_pathc;
+ globfree(&g);
+}
+
+unsigned long default_huge_page_size(void)
+{
+ unsigned long hps = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+ if (!f)
+ return 0;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
+ hps <<= 10;
+ break;
+ }
+ }
+ free(line);
+ return hps;
+}
+
+void show(unsigned long ps)
+{
+ char buf[100];
+ if (ps == getpagesize())
+ return;
+ printf("%luMB: ", ps >> 20);
+ fflush(stdout);
+ snprintf(buf, sizeof buf,
+ "cat /sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
+ ps >> 10);
+ system(buf);
+}
+
+unsigned long read_sysfs(int warn, char *fmt, ...)
+{
+ char *line = NULL;
+ size_t linelen = 0;
+ char buf[100];
+ FILE *f;
+ va_list ap;
+ unsigned long val = 0;
+
+ va_start(ap, fmt);
+ vsnprintf(buf, sizeof buf, fmt, ap);
+ va_end(ap);
+
+ f = fopen(buf, "r");
+ if (!f) {
+ if (warn)
+ printf("missing %s\n", buf);
+ return 0;
+ }
+ if (getline(&line, &linelen, f) > 0) {
+ sscanf(line, "%lu", &val);
+ }
+ fclose(f);
+ free(line);
+ return val;
+}
+
+unsigned long read_free(unsigned long ps)
+{
+ return read_sysfs(ps != getpagesize(),
+ "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
+ ps >> 10);
+}
+
+void test_mmap(unsigned long size, unsigned flags)
+{
+ char *map;
+ unsigned long before, after;
+ int err;
+
+ before = read_free(size);
+ map = mmap(NULL, size*NUM_PAGES, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB|flags, -1, 0);
+
+ if (map == (char *)-1) err("mmap");
+ memset(map, 0xff, size*NUM_PAGES);
+ after = read_free(size);
+ Dprintf("before %lu after %lu diff %ld size %lu\n",
+ before, after, before - after, size);
+ assert(size == getpagesize() || (before - after) == NUM_PAGES);
+ show(size);
+ err = munmap(map, size);
+ assert(!err);
+}
+
+void test_shmget(unsigned long size, unsigned flags)
+{
+ int id;
+ unsigned long before, after;
+ int err;
+
+ before = read_free(size);
+ id = shmget(IPC_PRIVATE, size * NUM_PAGES, IPC_CREAT|0600|flags);
+ if (id < 0) err("shmget");
+
+ struct shm_info i;
+ if (shmctl(id, SHM_INFO, (void *)&i) < 0) err("shmctl");
+ Dprintf("alloc %lu res %lu\n", i.shm_tot, i.shm_rss);
+
+
+ Dprintf("id %d\n", id);
+ char *map = shmat(id, NULL, 0600);
+ if (map == (char*)-1) err("shmat");
+
+ shmctl(id, IPC_RMID, NULL);
+
+ memset(map, 0xff, size*NUM_PAGES);
+ after = read_free(size);
+
+ Dprintf("before %lu after %lu diff %ld size %lu\n",
+ before, after, before - after, size);
+ assert(size == getpagesize() || (before - after) == NUM_PAGES);
+ show(size);
+ err = shmdt(map);
+ assert(!err);
+}
+
+void sanity_checks(void)
+{
+ int i;
+ unsigned long largest = getpagesize();
+
+ for (i = 0; i < num_page_sizes; i++) {
+ if (page_sizes[i] > largest)
+ largest = page_sizes[i];
+
+ if (read_free(page_sizes[i]) < NUM_PAGES) {
+ printf("Not enough huge pages for page size %lu MB, need %u\n",
+ page_sizes[i] >> 20,
+ NUM_PAGES);
+ exit(0);
+ }
+ }
+
+ if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) {
+ printf("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES);
+ exit(0);
+ }
+
+#if defined(__x86_64__)
+ if (largest != 1U<<30) {
+ printf("No GB pages available on x86-64\n"
+ "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES);
+ exit(0);
+ }
+#endif
+}
+
+int main(void)
+{
+ int i;
+ unsigned default_hps = default_huge_page_size();
+
+ find_pagesizes();
+
+ sanity_checks();
+
+ for (i = 0; i < num_page_sizes; i++) {
+ unsigned long ps = page_sizes[i];
+ int arg = ilog2(ps) << MAP_HUGE_SHIFT;
+ printf("Testing %luMB mmap with shift %x\n", ps >> 20, arg);
+ test_mmap(ps, MAP_HUGETLB | arg);
+ }
+ printf("Testing default huge mmap\n");
+ test_mmap(default_hps, SHM_HUGETLB);
+
+ puts("Testing non-huge shmget");
+ test_shmget(getpagesize(), 0);
+
+ for (i = 0; i < num_page_sizes; i++) {
+ unsigned long ps = page_sizes[i];
+ int arg = ilog2(ps) << SHM_HUGE_SHIFT;
+ printf("Testing %luMB shmget with shift %x\n", ps >> 20, arg);
+ test_shmget(ps, SHM_HUGETLB | arg);
+ }
+ puts("default huge shmget");
+ test_shmget(default_hps, SHM_HUGETLB);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/transhuge-stress.c b/tools/testing/selftests/vm/transhuge-stress.c
new file mode 100644
index 000000000..fd7f1b4a9
--- /dev/null
+++ b/tools/testing/selftests/vm/transhuge-stress.c
@@ -0,0 +1,144 @@
+/*
+ * Stress test for transparent huge pages, memory compaction and migration.
+ *
+ * Authors: Konstantin Khlebnikov <koct9i@gmail.com>
+ *
+ * This is free and unencumbered software released into the public domain.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <err.h>
+#include <time.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#define PAGE_SHIFT 12
+#define HPAGE_SHIFT 21
+
+#define PAGE_SIZE (1 << PAGE_SHIFT)
+#define HPAGE_SIZE (1 << HPAGE_SHIFT)
+
+#define PAGEMAP_PRESENT(ent) (((ent) & (1ull << 63)) != 0)
+#define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1))
+
+int pagemap_fd;
+
+int64_t allocate_transhuge(void *ptr)
+{
+ uint64_t ent[2];
+
+ /* drop pmd */
+ if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_ANONYMOUS |
+ MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
+ errx(2, "mmap transhuge");
+
+ if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
+ err(2, "MADV_HUGEPAGE");
+
+ /* allocate transparent huge page */
+ *(volatile void **)ptr = ptr;
+
+ if (pread(pagemap_fd, ent, sizeof(ent),
+ (uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent))
+ err(2, "read pagemap");
+
+ if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
+ PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
+ !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - PAGE_SHIFT)) - 1)))
+ return PAGEMAP_PFN(ent[0]);
+
+ return -1;
+}
+
+int main(int argc, char **argv)
+{
+ size_t ram, len;
+ void *ptr, *p;
+ struct timespec a, b;
+ double s;
+ uint8_t *map;
+ size_t map_len;
+
+ ram = sysconf(_SC_PHYS_PAGES);
+ if (ram > SIZE_MAX / sysconf(_SC_PAGESIZE) / 4)
+ ram = SIZE_MAX / 4;
+ else
+ ram *= sysconf(_SC_PAGESIZE);
+
+ if (argc == 1)
+ len = ram;
+ else if (!strcmp(argv[1], "-h"))
+ errx(1, "usage: %s [size in MiB]", argv[0]);
+ else
+ len = atoll(argv[1]) << 20;
+
+ warnx("allocate %zd transhuge pages, using %zd MiB virtual memory"
+ " and %zd MiB of ram", len >> HPAGE_SHIFT, len >> 20,
+ len >> (20 + HPAGE_SHIFT - PAGE_SHIFT - 1));
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd < 0)
+ err(2, "open pagemap");
+
+ len -= len % HPAGE_SIZE;
+ ptr = mmap(NULL, len + HPAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0);
+ if (ptr == MAP_FAILED)
+ err(2, "initial mmap");
+ ptr += HPAGE_SIZE - (uintptr_t)ptr % HPAGE_SIZE;
+
+ if (madvise(ptr, len, MADV_HUGEPAGE))
+ err(2, "MADV_HUGEPAGE");
+
+ map_len = ram >> (HPAGE_SHIFT - 1);
+ map = malloc(map_len);
+ if (!map)
+ errx(2, "map malloc");
+
+ while (1) {
+ int nr_succeed = 0, nr_failed = 0, nr_pages = 0;
+
+ memset(map, 0, map_len);
+
+ clock_gettime(CLOCK_MONOTONIC, &a);
+ for (p = ptr; p < ptr + len; p += HPAGE_SIZE) {
+ int64_t pfn;
+
+ pfn = allocate_transhuge(p);
+
+ if (pfn < 0) {
+ nr_failed++;
+ } else {
+ size_t idx = pfn >> (HPAGE_SHIFT - PAGE_SHIFT);
+
+ nr_succeed++;
+ if (idx >= map_len) {
+ map = realloc(map, idx + 1);
+ if (!map)
+ errx(2, "map realloc");
+ memset(map + map_len, 0, idx + 1 - map_len);
+ map_len = idx + 1;
+ }
+ if (!map[idx])
+ nr_pages++;
+ map[idx] = 1;
+ }
+
+ /* split transhuge page, keep last page */
+ if (madvise(p, HPAGE_SIZE - PAGE_SIZE, MADV_DONTNEED))
+ err(2, "MADV_DONTNEED");
+ }
+ clock_gettime(CLOCK_MONOTONIC, &b);
+ s = b.tv_sec - a.tv_sec + (b.tv_nsec - a.tv_nsec) / 1000000000.;
+
+ warnx("%.3f s/loop, %.3f ms/page, %10.3f MiB/s\t"
+ "%4d succeed, %4d failed, %4d different pages",
+ s, s * 1000 / (len >> HPAGE_SHIFT), len / s / (1 << 20),
+ nr_succeed, nr_failed, nr_pages);
+ }
+}
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
new file mode 100644
index 000000000..b2c7043c0
--- /dev/null
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -0,0 +1,1333 @@
+/*
+ * Stress userfaultfd syscall.
+ *
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * This test allocates two virtual areas and bounces the physical
+ * memory across the two virtual areas (from area_src to area_dst)
+ * using userfaultfd.
+ *
+ * There are three threads running per CPU:
+ *
+ * 1) one per-CPU thread takes a per-page pthread_mutex in a random
+ * page of the area_dst (while the physical page may still be in
+ * area_src), and increments a per-page counter in the same page,
+ * and checks its value against a verification region.
+ *
+ * 2) another per-CPU thread handles the userfaults generated by
+ * thread 1 above. userfaultfd blocking reads or poll() modes are
+ * exercised interleaved.
+ *
+ * 3) one last per-CPU thread transfers the memory in the background
+ * at maximum bandwidth (if not already transferred by thread
+ * 2). Each cpu thread takes cares of transferring a portion of the
+ * area.
+ *
+ * When all threads of type 3 completed the transfer, one bounce is
+ * complete. area_src and area_dst are then swapped. All threads are
+ * respawned and so the bounce is immediately restarted in the
+ * opposite direction.
+ *
+ * per-CPU threads 1 by triggering userfaults inside
+ * pthread_mutex_lock will also verify the atomicity of the memory
+ * transfer (UFFDIO_COPY).
+ *
+ * The program takes two parameters: the amounts of physical memory in
+ * megabytes (MiB) of the area and the number of bounces to execute.
+ *
+ * # 100MiB 99999 bounces
+ * ./userfaultfd 100 99999
+ *
+ * # 1GiB 99 bounces
+ * ./userfaultfd 1000 99
+ *
+ * # 10MiB-~6GiB 999 bounces, continue forever unless an error triggers
+ * while ./userfaultfd $[RANDOM % 6000 + 10] 999; do true; done
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <signal.h>
+#include <poll.h>
+#include <string.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <pthread.h>
+#include <linux/userfaultfd.h>
+#include <setjmp.h>
+#include <stdbool.h>
+
+#include "../kselftest.h"
+
+#ifdef __NR_userfaultfd
+
+static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
+
+#define BOUNCE_RANDOM (1<<0)
+#define BOUNCE_RACINGFAULTS (1<<1)
+#define BOUNCE_VERIFY (1<<2)
+#define BOUNCE_POLL (1<<3)
+static int bounces;
+
+#define TEST_ANON 1
+#define TEST_HUGETLB 2
+#define TEST_SHMEM 3
+static int test_type;
+
+/* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */
+#define ALARM_INTERVAL_SECS 10
+static volatile bool test_uffdio_copy_eexist = true;
+static volatile bool test_uffdio_zeropage_eexist = true;
+
+static bool map_shared;
+static int huge_fd;
+static char *huge_fd_off0;
+static unsigned long long *count_verify;
+static int uffd, uffd_flags, finished, *pipefd;
+static char *area_src, *area_src_alias, *area_dst, *area_dst_alias;
+static char *zeropage;
+pthread_attr_t attr;
+
+/* pthread_mutex_t starts at page offset 0 */
+#define area_mutex(___area, ___nr) \
+ ((pthread_mutex_t *) ((___area) + (___nr)*page_size))
+/*
+ * count is placed in the page after pthread_mutex_t naturally aligned
+ * to avoid non alignment faults on non-x86 archs.
+ */
+#define area_count(___area, ___nr) \
+ ((volatile unsigned long long *) ((unsigned long) \
+ ((___area) + (___nr)*page_size + \
+ sizeof(pthread_mutex_t) + \
+ sizeof(unsigned long long) - 1) & \
+ ~(unsigned long)(sizeof(unsigned long long) \
+ - 1)))
+
+static int anon_release_pages(char *rel_area)
+{
+ int ret = 0;
+
+ if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ ret = 1;
+ }
+
+ return ret;
+}
+
+static void anon_allocate_area(void **alloc_area)
+{
+ *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (*alloc_area == MAP_FAILED) {
+ fprintf(stderr, "mmap of anonymous memory failed");
+ *alloc_area = NULL;
+ }
+}
+
+static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
+{
+}
+
+/* HugeTLB memory */
+static int hugetlb_release_pages(char *rel_area)
+{
+ int ret = 0;
+
+ if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ rel_area == huge_fd_off0 ? 0 :
+ nr_pages * page_size,
+ nr_pages * page_size)) {
+ perror("fallocate");
+ ret = 1;
+ }
+
+ return ret;
+}
+
+
+static void hugetlb_allocate_area(void **alloc_area)
+{
+ void *area_alias = NULL;
+ char **alloc_area_alias;
+ *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+ (map_shared ? MAP_SHARED : MAP_PRIVATE) |
+ MAP_HUGETLB,
+ huge_fd, *alloc_area == area_src ? 0 :
+ nr_pages * page_size);
+ if (*alloc_area == MAP_FAILED) {
+ fprintf(stderr, "mmap of hugetlbfs file failed\n");
+ *alloc_area = NULL;
+ }
+
+ if (map_shared) {
+ area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_HUGETLB,
+ huge_fd, *alloc_area == area_src ? 0 :
+ nr_pages * page_size);
+ if (area_alias == MAP_FAILED) {
+ if (munmap(*alloc_area, nr_pages * page_size) < 0)
+ perror("hugetlb munmap"), exit(1);
+ *alloc_area = NULL;
+ return;
+ }
+ }
+ if (*alloc_area == area_src) {
+ huge_fd_off0 = *alloc_area;
+ alloc_area_alias = &area_src_alias;
+ } else {
+ alloc_area_alias = &area_dst_alias;
+ }
+ if (area_alias)
+ *alloc_area_alias = area_alias;
+}
+
+static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset)
+{
+ if (!map_shared)
+ return;
+ /*
+ * We can't zap just the pagetable with hugetlbfs because
+ * MADV_DONTEED won't work. So exercise -EEXIST on a alias
+ * mapping where the pagetables are not established initially,
+ * this way we'll exercise the -EEXEC at the fs level.
+ */
+ *start = (unsigned long) area_dst_alias + offset;
+}
+
+/* Shared memory */
+static int shmem_release_pages(char *rel_area)
+{
+ int ret = 0;
+
+ if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) {
+ perror("madvise");
+ ret = 1;
+ }
+
+ return ret;
+}
+
+static void shmem_allocate_area(void **alloc_area)
+{
+ *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+ if (*alloc_area == MAP_FAILED) {
+ fprintf(stderr, "shared memory mmap failed\n");
+ *alloc_area = NULL;
+ }
+}
+
+struct uffd_test_ops {
+ unsigned long expected_ioctls;
+ void (*allocate_area)(void **alloc_area);
+ int (*release_pages)(char *rel_area);
+ void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
+};
+
+#define ANON_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \
+ (1 << _UFFDIO_COPY) | \
+ (1 << _UFFDIO_ZEROPAGE))
+
+static struct uffd_test_ops anon_uffd_test_ops = {
+ .expected_ioctls = ANON_EXPECTED_IOCTLS,
+ .allocate_area = anon_allocate_area,
+ .release_pages = anon_release_pages,
+ .alias_mapping = noop_alias_mapping,
+};
+
+static struct uffd_test_ops shmem_uffd_test_ops = {
+ .expected_ioctls = ANON_EXPECTED_IOCTLS,
+ .allocate_area = shmem_allocate_area,
+ .release_pages = shmem_release_pages,
+ .alias_mapping = noop_alias_mapping,
+};
+
+static struct uffd_test_ops hugetlb_uffd_test_ops = {
+ .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC,
+ .allocate_area = hugetlb_allocate_area,
+ .release_pages = hugetlb_release_pages,
+ .alias_mapping = hugetlb_alias_mapping,
+};
+
+static struct uffd_test_ops *uffd_test_ops;
+
+static int my_bcmp(char *str1, char *str2, size_t n)
+{
+ unsigned long i;
+ for (i = 0; i < n; i++)
+ if (str1[i] != str2[i])
+ return 1;
+ return 0;
+}
+
+static void *locking_thread(void *arg)
+{
+ unsigned long cpu = (unsigned long) arg;
+ struct random_data rand;
+ unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */
+ int32_t rand_nr;
+ unsigned long long count;
+ char randstate[64];
+ unsigned int seed;
+ time_t start;
+
+ if (bounces & BOUNCE_RANDOM) {
+ seed = (unsigned int) time(NULL) - bounces;
+ if (!(bounces & BOUNCE_RACINGFAULTS))
+ seed += cpu;
+ bzero(&rand, sizeof(rand));
+ bzero(&randstate, sizeof(randstate));
+ if (initstate_r(seed, randstate, sizeof(randstate), &rand))
+ fprintf(stderr, "srandom_r error\n"), exit(1);
+ } else {
+ page_nr = -bounces;
+ if (!(bounces & BOUNCE_RACINGFAULTS))
+ page_nr += cpu * nr_pages_per_cpu;
+ }
+
+ while (!finished) {
+ if (bounces & BOUNCE_RANDOM) {
+ if (random_r(&rand, &rand_nr))
+ fprintf(stderr, "random_r 1 error\n"), exit(1);
+ page_nr = rand_nr;
+ if (sizeof(page_nr) > sizeof(rand_nr)) {
+ if (random_r(&rand, &rand_nr))
+ fprintf(stderr, "random_r 2 error\n"), exit(1);
+ page_nr |= (((unsigned long) rand_nr) << 16) <<
+ 16;
+ }
+ } else
+ page_nr += 1;
+ page_nr %= nr_pages;
+
+ start = time(NULL);
+ if (bounces & BOUNCE_VERIFY) {
+ count = *area_count(area_dst, page_nr);
+ if (!count)
+ fprintf(stderr,
+ "page_nr %lu wrong count %Lu %Lu\n",
+ page_nr, count,
+ count_verify[page_nr]), exit(1);
+
+
+ /*
+ * We can't use bcmp (or memcmp) because that
+ * returns 0 erroneously if the memory is
+ * changing under it (even if the end of the
+ * page is never changing and always
+ * different).
+ */
+#if 1
+ if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
+ page_size))
+ fprintf(stderr,
+ "my_bcmp page_nr %lu wrong count %Lu %Lu\n",
+ page_nr, count,
+ count_verify[page_nr]), exit(1);
+#else
+ unsigned long loops;
+
+ loops = 0;
+ /* uncomment the below line to test with mutex */
+ /* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */
+ while (!bcmp(area_dst + page_nr * page_size, zeropage,
+ page_size)) {
+ loops += 1;
+ if (loops > 10)
+ break;
+ }
+ /* uncomment below line to test with mutex */
+ /* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */
+ if (loops) {
+ fprintf(stderr,
+ "page_nr %lu all zero thread %lu %p %lu\n",
+ page_nr, cpu, area_dst + page_nr * page_size,
+ loops);
+ if (loops > 10)
+ exit(1);
+ }
+#endif
+ }
+
+ pthread_mutex_lock(area_mutex(area_dst, page_nr));
+ count = *area_count(area_dst, page_nr);
+ if (count != count_verify[page_nr]) {
+ fprintf(stderr,
+ "page_nr %lu memory corruption %Lu %Lu\n",
+ page_nr, count,
+ count_verify[page_nr]), exit(1);
+ }
+ count++;
+ *area_count(area_dst, page_nr) = count_verify[page_nr] = count;
+ pthread_mutex_unlock(area_mutex(area_dst, page_nr));
+
+ if (time(NULL) - start > 1)
+ fprintf(stderr,
+ "userfault too slow %ld "
+ "possible false positive with overcommit\n",
+ time(NULL) - start);
+ }
+
+ return NULL;
+}
+
+static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
+ unsigned long offset)
+{
+ uffd_test_ops->alias_mapping(&uffdio_copy->dst,
+ uffdio_copy->len,
+ offset);
+ if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
+ /* real retval in ufdio_copy.copy */
+ if (uffdio_copy->copy != -EEXIST)
+ fprintf(stderr, "UFFDIO_COPY retry error %Ld\n",
+ uffdio_copy->copy), exit(1);
+ } else {
+ fprintf(stderr, "UFFDIO_COPY retry unexpected %Ld\n",
+ uffdio_copy->copy), exit(1);
+ }
+}
+
+static int __copy_page(int ufd, unsigned long offset, bool retry)
+{
+ struct uffdio_copy uffdio_copy;
+
+ if (offset >= nr_pages * page_size)
+ fprintf(stderr, "unexpected offset %lu\n",
+ offset), exit(1);
+ uffdio_copy.dst = (unsigned long) area_dst + offset;
+ uffdio_copy.src = (unsigned long) area_src + offset;
+ uffdio_copy.len = page_size;
+ uffdio_copy.mode = 0;
+ uffdio_copy.copy = 0;
+ if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
+ /* real retval in ufdio_copy.copy */
+ if (uffdio_copy.copy != -EEXIST)
+ fprintf(stderr, "UFFDIO_COPY error %Ld\n",
+ uffdio_copy.copy), exit(1);
+ } else if (uffdio_copy.copy != page_size) {
+ fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
+ uffdio_copy.copy), exit(1);
+ } else {
+ if (test_uffdio_copy_eexist && retry) {
+ test_uffdio_copy_eexist = false;
+ retry_copy_page(ufd, &uffdio_copy, offset);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+static int copy_page_retry(int ufd, unsigned long offset)
+{
+ return __copy_page(ufd, offset, true);
+}
+
+static int copy_page(int ufd, unsigned long offset)
+{
+ return __copy_page(ufd, offset, false);
+}
+
+static void *uffd_poll_thread(void *arg)
+{
+ unsigned long cpu = (unsigned long) arg;
+ struct pollfd pollfd[2];
+ struct uffd_msg msg;
+ struct uffdio_register uffd_reg;
+ int ret;
+ unsigned long offset;
+ char tmp_chr;
+ unsigned long userfaults = 0;
+
+ pollfd[0].fd = uffd;
+ pollfd[0].events = POLLIN;
+ pollfd[1].fd = pipefd[cpu*2];
+ pollfd[1].events = POLLIN;
+
+ for (;;) {
+ ret = poll(pollfd, 2, -1);
+ if (!ret)
+ fprintf(stderr, "poll error %d\n", ret), exit(1);
+ if (ret < 0)
+ perror("poll"), exit(1);
+ if (pollfd[1].revents & POLLIN) {
+ if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
+ fprintf(stderr, "read pipefd error\n"),
+ exit(1);
+ break;
+ }
+ if (!(pollfd[0].revents & POLLIN))
+ fprintf(stderr, "pollfd[0].revents %d\n",
+ pollfd[0].revents), exit(1);
+ ret = read(uffd, &msg, sizeof(msg));
+ if (ret < 0) {
+ if (errno == EAGAIN)
+ continue;
+ perror("nonblocking read error"), exit(1);
+ }
+ switch (msg.event) {
+ default:
+ fprintf(stderr, "unexpected msg event %u\n",
+ msg.event), exit(1);
+ break;
+ case UFFD_EVENT_PAGEFAULT:
+ if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+ fprintf(stderr, "unexpected write fault\n"), exit(1);
+ offset = (char *)(unsigned long)msg.arg.pagefault.address -
+ area_dst;
+ offset &= ~(page_size-1);
+ if (copy_page(uffd, offset))
+ userfaults++;
+ break;
+ case UFFD_EVENT_FORK:
+ close(uffd);
+ uffd = msg.arg.fork.ufd;
+ pollfd[0].fd = uffd;
+ break;
+ case UFFD_EVENT_REMOVE:
+ uffd_reg.range.start = msg.arg.remove.start;
+ uffd_reg.range.len = msg.arg.remove.end -
+ msg.arg.remove.start;
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
+ fprintf(stderr, "remove failure\n"), exit(1);
+ break;
+ case UFFD_EVENT_REMAP:
+ area_dst = (char *)(unsigned long)msg.arg.remap.to;
+ break;
+ }
+ }
+ return (void *)userfaults;
+}
+
+pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void *uffd_read_thread(void *arg)
+{
+ unsigned long *this_cpu_userfaults;
+ struct uffd_msg msg;
+ unsigned long offset;
+ int ret;
+
+ this_cpu_userfaults = (unsigned long *) arg;
+ *this_cpu_userfaults = 0;
+
+ pthread_mutex_unlock(&uffd_read_mutex);
+ /* from here cancellation is ok */
+
+ for (;;) {
+ ret = read(uffd, &msg, sizeof(msg));
+ if (ret != sizeof(msg)) {
+ if (ret < 0)
+ perror("blocking read error"), exit(1);
+ else
+ fprintf(stderr, "short read\n"), exit(1);
+ }
+ if (msg.event != UFFD_EVENT_PAGEFAULT)
+ fprintf(stderr, "unexpected msg event %u\n",
+ msg.event), exit(1);
+ if (bounces & BOUNCE_VERIFY &&
+ msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+ fprintf(stderr, "unexpected write fault\n"), exit(1);
+ offset = (char *)(unsigned long)msg.arg.pagefault.address -
+ area_dst;
+ offset &= ~(page_size-1);
+ if (copy_page(uffd, offset))
+ (*this_cpu_userfaults)++;
+ }
+ return (void *)NULL;
+}
+
+static void *background_thread(void *arg)
+{
+ unsigned long cpu = (unsigned long) arg;
+ unsigned long page_nr;
+
+ for (page_nr = cpu * nr_pages_per_cpu;
+ page_nr < (cpu+1) * nr_pages_per_cpu;
+ page_nr++)
+ copy_page_retry(uffd, page_nr * page_size);
+
+ return NULL;
+}
+
+static int stress(unsigned long *userfaults)
+{
+ unsigned long cpu;
+ pthread_t locking_threads[nr_cpus];
+ pthread_t uffd_threads[nr_cpus];
+ pthread_t background_threads[nr_cpus];
+ void **_userfaults = (void **) userfaults;
+
+ finished = 0;
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ if (pthread_create(&locking_threads[cpu], &attr,
+ locking_thread, (void *)cpu))
+ return 1;
+ if (bounces & BOUNCE_POLL) {
+ if (pthread_create(&uffd_threads[cpu], &attr,
+ uffd_poll_thread, (void *)cpu))
+ return 1;
+ } else {
+ if (pthread_create(&uffd_threads[cpu], &attr,
+ uffd_read_thread,
+ &_userfaults[cpu]))
+ return 1;
+ pthread_mutex_lock(&uffd_read_mutex);
+ }
+ if (pthread_create(&background_threads[cpu], &attr,
+ background_thread, (void *)cpu))
+ return 1;
+ }
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ if (pthread_join(background_threads[cpu], NULL))
+ return 1;
+
+ /*
+ * Be strict and immediately zap area_src, the whole area has
+ * been transferred already by the background treads. The
+ * area_src could then be faulted in in a racy way by still
+ * running uffdio_threads reading zeropages after we zapped
+ * area_src (but they're guaranteed to get -EEXIST from
+ * UFFDIO_COPY without writing zero pages into area_dst
+ * because the background threads already completed).
+ */
+ if (uffd_test_ops->release_pages(area_src))
+ return 1;
+
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ char c;
+ if (bounces & BOUNCE_POLL) {
+ if (write(pipefd[cpu*2+1], &c, 1) != 1) {
+ fprintf(stderr, "pipefd write error\n");
+ return 1;
+ }
+ if (pthread_join(uffd_threads[cpu], &_userfaults[cpu]))
+ return 1;
+ } else {
+ if (pthread_cancel(uffd_threads[cpu]))
+ return 1;
+ if (pthread_join(uffd_threads[cpu], NULL))
+ return 1;
+ }
+ }
+
+ finished = 1;
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ if (pthread_join(locking_threads[cpu], NULL))
+ return 1;
+
+ return 0;
+}
+
+static int userfaultfd_open(int features)
+{
+ struct uffdio_api uffdio_api;
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ if (uffd < 0) {
+ fprintf(stderr,
+ "userfaultfd syscall not available in this kernel\n");
+ return 1;
+ }
+ uffd_flags = fcntl(uffd, F_GETFD, NULL);
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = features;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
+ fprintf(stderr, "UFFDIO_API\n");
+ return 1;
+ }
+ if (uffdio_api.api != UFFD_API) {
+ fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api);
+ return 1;
+ }
+
+ return 0;
+}
+
+sigjmp_buf jbuf, *sigbuf;
+
+static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
+{
+ if (sig == SIGBUS) {
+ if (sigbuf)
+ siglongjmp(*sigbuf, 1);
+ abort();
+ }
+}
+
+/*
+ * For non-cooperative userfaultfd test we fork() a process that will
+ * generate pagefaults, will mremap the area monitored by the
+ * userfaultfd and at last this process will release the monitored
+ * area.
+ * For the anonymous and shared memory the area is divided into two
+ * parts, the first part is accessed before mremap, and the second
+ * part is accessed after mremap. Since hugetlbfs does not support
+ * mremap, the entire monitored area is accessed in a single pass for
+ * HUGETLB_TEST.
+ * The release of the pages currently generates event for shmem and
+ * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
+ * for hugetlb.
+ * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
+ * monitored area, generate pagefaults and test that signal is delivered.
+ * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
+ * test robustness use case - we release monitored area, fork a process
+ * that will generate pagefaults and verify signal is generated.
+ * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
+ * feature. Using monitor thread, verify no userfault events are generated.
+ */
+static int faulting_process(int signal_test)
+{
+ unsigned long nr;
+ unsigned long long count;
+ unsigned long split_nr_pages;
+ unsigned long lastnr;
+ struct sigaction act;
+ unsigned long signalled = 0;
+
+ if (test_type != TEST_HUGETLB)
+ split_nr_pages = (nr_pages + 1) / 2;
+ else
+ split_nr_pages = nr_pages;
+
+ if (signal_test) {
+ sigbuf = &jbuf;
+ memset(&act, 0, sizeof(act));
+ act.sa_sigaction = sighndl;
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGBUS, &act, 0)) {
+ perror("sigaction");
+ return 1;
+ }
+ lastnr = (unsigned long)-1;
+ }
+
+ for (nr = 0; nr < split_nr_pages; nr++) {
+ if (signal_test) {
+ if (sigsetjmp(*sigbuf, 1) != 0) {
+ if (nr == lastnr) {
+ fprintf(stderr, "Signal repeated\n");
+ return 1;
+ }
+
+ lastnr = nr;
+ if (signal_test == 1) {
+ if (copy_page(uffd, nr * page_size))
+ signalled++;
+ } else {
+ signalled++;
+ continue;
+ }
+ }
+ }
+
+ count = *area_count(area_dst, nr);
+ if (count != count_verify[nr]) {
+ fprintf(stderr,
+ "nr %lu memory corruption %Lu %Lu\n",
+ nr, count,
+ count_verify[nr]), exit(1);
+ }
+ }
+
+ if (signal_test)
+ return signalled != split_nr_pages;
+
+ if (test_type == TEST_HUGETLB)
+ return 0;
+
+ area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
+ if (area_dst == MAP_FAILED)
+ perror("mremap"), exit(1);
+
+ for (; nr < nr_pages; nr++) {
+ count = *area_count(area_dst, nr);
+ if (count != count_verify[nr]) {
+ fprintf(stderr,
+ "nr %lu memory corruption %Lu %Lu\n",
+ nr, count,
+ count_verify[nr]), exit(1);
+ }
+ }
+
+ if (uffd_test_ops->release_pages(area_dst))
+ return 1;
+
+ for (nr = 0; nr < nr_pages; nr++) {
+ if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
+ fprintf(stderr, "nr %lu is not zero\n", nr), exit(1);
+ }
+
+ return 0;
+}
+
+static void retry_uffdio_zeropage(int ufd,
+ struct uffdio_zeropage *uffdio_zeropage,
+ unsigned long offset)
+{
+ uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start,
+ uffdio_zeropage->range.len,
+ offset);
+ if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
+ if (uffdio_zeropage->zeropage != -EEXIST)
+ fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n",
+ uffdio_zeropage->zeropage), exit(1);
+ } else {
+ fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n",
+ uffdio_zeropage->zeropage), exit(1);
+ }
+}
+
+static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
+{
+ struct uffdio_zeropage uffdio_zeropage;
+ int ret;
+ unsigned long has_zeropage;
+
+ has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE);
+
+ if (offset >= nr_pages * page_size)
+ fprintf(stderr, "unexpected offset %lu\n",
+ offset), exit(1);
+ uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
+ uffdio_zeropage.range.len = page_size;
+ uffdio_zeropage.mode = 0;
+ ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
+ if (ret) {
+ /* real retval in ufdio_zeropage.zeropage */
+ if (has_zeropage) {
+ if (uffdio_zeropage.zeropage == -EEXIST)
+ fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"),
+ exit(1);
+ else
+ fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n",
+ uffdio_zeropage.zeropage), exit(1);
+ } else {
+ if (uffdio_zeropage.zeropage != -EINVAL)
+ fprintf(stderr,
+ "UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
+ uffdio_zeropage.zeropage), exit(1);
+ }
+ } else if (has_zeropage) {
+ if (uffdio_zeropage.zeropage != page_size) {
+ fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
+ uffdio_zeropage.zeropage), exit(1);
+ } else {
+ if (test_uffdio_zeropage_eexist && retry) {
+ test_uffdio_zeropage_eexist = false;
+ retry_uffdio_zeropage(ufd, &uffdio_zeropage,
+ offset);
+ }
+ return 1;
+ }
+ } else {
+ fprintf(stderr,
+ "UFFDIO_ZEROPAGE succeeded %Ld\n",
+ uffdio_zeropage.zeropage), exit(1);
+ }
+
+ return 0;
+}
+
+static int uffdio_zeropage(int ufd, unsigned long offset)
+{
+ return __uffdio_zeropage(ufd, offset, false);
+}
+
+/* exercise UFFDIO_ZEROPAGE */
+static int userfaultfd_zeropage_test(void)
+{
+ struct uffdio_register uffdio_register;
+ unsigned long expected_ioctls;
+
+ printf("testing UFFDIO_ZEROPAGE: ");
+ fflush(stdout);
+
+ if (uffd_test_ops->release_pages(area_dst))
+ return 1;
+
+ if (userfaultfd_open(0) < 0)
+ return 1;
+ uffdio_register.range.start = (unsigned long) area_dst;
+ uffdio_register.range.len = nr_pages * page_size;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ fprintf(stderr, "register failure\n"), exit(1);
+
+ expected_ioctls = uffd_test_ops->expected_ioctls;
+ if ((uffdio_register.ioctls & expected_ioctls) !=
+ expected_ioctls)
+ fprintf(stderr,
+ "unexpected missing ioctl for anon memory\n"),
+ exit(1);
+
+ if (uffdio_zeropage(uffd, 0)) {
+ if (my_bcmp(area_dst, zeropage, page_size))
+ fprintf(stderr, "zeropage is not zero\n"), exit(1);
+ }
+
+ close(uffd);
+ printf("done.\n");
+ return 0;
+}
+
+static int userfaultfd_events_test(void)
+{
+ struct uffdio_register uffdio_register;
+ unsigned long expected_ioctls;
+ unsigned long userfaults;
+ pthread_t uffd_mon;
+ int err, features;
+ pid_t pid;
+ char c;
+
+ printf("testing events (fork, remap, remove): ");
+ fflush(stdout);
+
+ if (uffd_test_ops->release_pages(area_dst))
+ return 1;
+
+ features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
+ UFFD_FEATURE_EVENT_REMOVE;
+ if (userfaultfd_open(features) < 0)
+ return 1;
+ fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+
+ uffdio_register.range.start = (unsigned long) area_dst;
+ uffdio_register.range.len = nr_pages * page_size;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ fprintf(stderr, "register failure\n"), exit(1);
+
+ expected_ioctls = uffd_test_ops->expected_ioctls;
+ if ((uffdio_register.ioctls & expected_ioctls) !=
+ expected_ioctls)
+ fprintf(stderr,
+ "unexpected missing ioctl for anon memory\n"),
+ exit(1);
+
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL))
+ perror("uffd_poll_thread create"), exit(1);
+
+ pid = fork();
+ if (pid < 0)
+ perror("fork"), exit(1);
+
+ if (!pid)
+ return faulting_process(0);
+
+ waitpid(pid, &err, 0);
+ if (err)
+ fprintf(stderr, "faulting process failed\n"), exit(1);
+
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ perror("pipe write"), exit(1);
+ if (pthread_join(uffd_mon, (void **)&userfaults))
+ return 1;
+
+ close(uffd);
+ printf("userfaults: %ld\n", userfaults);
+
+ return userfaults != nr_pages;
+}
+
+static int userfaultfd_sig_test(void)
+{
+ struct uffdio_register uffdio_register;
+ unsigned long expected_ioctls;
+ unsigned long userfaults;
+ pthread_t uffd_mon;
+ int err, features;
+ pid_t pid;
+ char c;
+
+ printf("testing signal delivery: ");
+ fflush(stdout);
+
+ if (uffd_test_ops->release_pages(area_dst))
+ return 1;
+
+ features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS;
+ if (userfaultfd_open(features) < 0)
+ return 1;
+ fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+
+ uffdio_register.range.start = (unsigned long) area_dst;
+ uffdio_register.range.len = nr_pages * page_size;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ fprintf(stderr, "register failure\n"), exit(1);
+
+ expected_ioctls = uffd_test_ops->expected_ioctls;
+ if ((uffdio_register.ioctls & expected_ioctls) !=
+ expected_ioctls)
+ fprintf(stderr,
+ "unexpected missing ioctl for anon memory\n"),
+ exit(1);
+
+ if (faulting_process(1))
+ fprintf(stderr, "faulting process failed\n"), exit(1);
+
+ if (uffd_test_ops->release_pages(area_dst))
+ return 1;
+
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL))
+ perror("uffd_poll_thread create"), exit(1);
+
+ pid = fork();
+ if (pid < 0)
+ perror("fork"), exit(1);
+
+ if (!pid)
+ exit(faulting_process(2));
+
+ waitpid(pid, &err, 0);
+ if (err)
+ fprintf(stderr, "faulting process failed\n"), exit(1);
+
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ perror("pipe write"), exit(1);
+ if (pthread_join(uffd_mon, (void **)&userfaults))
+ return 1;
+
+ printf("done.\n");
+ if (userfaults)
+ fprintf(stderr, "Signal test failed, userfaults: %ld\n",
+ userfaults);
+ close(uffd);
+ return userfaults != 0;
+}
+static int userfaultfd_stress(void)
+{
+ void *area;
+ char *tmp_area;
+ unsigned long nr;
+ struct uffdio_register uffdio_register;
+ unsigned long cpu;
+ int err;
+ unsigned long userfaults[nr_cpus];
+
+ uffd_test_ops->allocate_area((void **)&area_src);
+ if (!area_src)
+ return 1;
+ uffd_test_ops->allocate_area((void **)&area_dst);
+ if (!area_dst)
+ return 1;
+
+ if (userfaultfd_open(0) < 0)
+ return 1;
+
+ count_verify = malloc(nr_pages * sizeof(unsigned long long));
+ if (!count_verify) {
+ perror("count_verify");
+ return 1;
+ }
+
+ for (nr = 0; nr < nr_pages; nr++) {
+ *area_mutex(area_src, nr) = (pthread_mutex_t)
+ PTHREAD_MUTEX_INITIALIZER;
+ count_verify[nr] = *area_count(area_src, nr) = 1;
+ /*
+ * In the transition between 255 to 256, powerpc will
+ * read out of order in my_bcmp and see both bytes as
+ * zero, so leave a placeholder below always non-zero
+ * after the count, to avoid my_bcmp to trigger false
+ * positives.
+ */
+ *(area_count(area_src, nr) + 1) = 1;
+ }
+
+ pipefd = malloc(sizeof(int) * nr_cpus * 2);
+ if (!pipefd) {
+ perror("pipefd");
+ return 1;
+ }
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) {
+ perror("pipe");
+ return 1;
+ }
+ }
+
+ if (posix_memalign(&area, page_size, page_size)) {
+ fprintf(stderr, "out of memory\n");
+ return 1;
+ }
+ zeropage = area;
+ bzero(zeropage, page_size);
+
+ pthread_mutex_lock(&uffd_read_mutex);
+
+ pthread_attr_init(&attr);
+ pthread_attr_setstacksize(&attr, 16*1024*1024);
+
+ err = 0;
+ while (bounces--) {
+ unsigned long expected_ioctls;
+
+ printf("bounces: %d, mode:", bounces);
+ if (bounces & BOUNCE_RANDOM)
+ printf(" rnd");
+ if (bounces & BOUNCE_RACINGFAULTS)
+ printf(" racing");
+ if (bounces & BOUNCE_VERIFY)
+ printf(" ver");
+ if (bounces & BOUNCE_POLL)
+ printf(" poll");
+ printf(", ");
+ fflush(stdout);
+
+ if (bounces & BOUNCE_POLL)
+ fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+ else
+ fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK);
+
+ /* register */
+ uffdio_register.range.start = (unsigned long) area_dst;
+ uffdio_register.range.len = nr_pages * page_size;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ fprintf(stderr, "register failure\n");
+ return 1;
+ }
+ expected_ioctls = uffd_test_ops->expected_ioctls;
+ if ((uffdio_register.ioctls & expected_ioctls) !=
+ expected_ioctls) {
+ fprintf(stderr,
+ "unexpected missing ioctl for anon memory\n");
+ return 1;
+ }
+
+ if (area_dst_alias) {
+ uffdio_register.range.start = (unsigned long)
+ area_dst_alias;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ fprintf(stderr, "register failure alias\n");
+ return 1;
+ }
+ }
+
+ /*
+ * The madvise done previously isn't enough: some
+ * uffd_thread could have read userfaults (one of
+ * those already resolved by the background thread)
+ * and it may be in the process of calling
+ * UFFDIO_COPY. UFFDIO_COPY will read the zapped
+ * area_src and it would map a zero page in it (of
+ * course such a UFFDIO_COPY is perfectly safe as it'd
+ * return -EEXIST). The problem comes at the next
+ * bounce though: that racing UFFDIO_COPY would
+ * generate zeropages in the area_src, so invalidating
+ * the previous MADV_DONTNEED. Without this additional
+ * MADV_DONTNEED those zeropages leftovers in the
+ * area_src would lead to -EEXIST failure during the
+ * next bounce, effectively leaving a zeropage in the
+ * area_dst.
+ *
+ * Try to comment this out madvise to see the memory
+ * corruption being caught pretty quick.
+ *
+ * khugepaged is also inhibited to collapse THP after
+ * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
+ * required to MADV_DONTNEED here.
+ */
+ if (uffd_test_ops->release_pages(area_dst))
+ return 1;
+
+ /* bounce pass */
+ if (stress(userfaults))
+ return 1;
+
+ /* unregister */
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) {
+ fprintf(stderr, "unregister failure\n");
+ return 1;
+ }
+ if (area_dst_alias) {
+ uffdio_register.range.start = (unsigned long) area_dst;
+ if (ioctl(uffd, UFFDIO_UNREGISTER,
+ &uffdio_register.range)) {
+ fprintf(stderr, "unregister failure alias\n");
+ return 1;
+ }
+ }
+
+ /* verification */
+ if (bounces & BOUNCE_VERIFY) {
+ for (nr = 0; nr < nr_pages; nr++) {
+ if (*area_count(area_dst, nr) != count_verify[nr]) {
+ fprintf(stderr,
+ "error area_count %Lu %Lu %lu\n",
+ *area_count(area_src, nr),
+ count_verify[nr],
+ nr);
+ err = 1;
+ bounces = 0;
+ }
+ }
+ }
+
+ /* prepare next bounce */
+ tmp_area = area_src;
+ area_src = area_dst;
+ area_dst = tmp_area;
+
+ tmp_area = area_src_alias;
+ area_src_alias = area_dst_alias;
+ area_dst_alias = tmp_area;
+
+ printf("userfaults:");
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ printf(" %lu", userfaults[cpu]);
+ printf("\n");
+ }
+
+ if (err)
+ return err;
+
+ close(uffd);
+ return userfaultfd_zeropage_test() || userfaultfd_sig_test()
+ || userfaultfd_events_test();
+}
+
+/*
+ * Copied from mlock2-tests.c
+ */
+unsigned long default_huge_page_size(void)
+{
+ unsigned long hps = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+
+ if (!f)
+ return 0;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
+ hps <<= 10;
+ break;
+ }
+ }
+
+ free(line);
+ fclose(f);
+ return hps;
+}
+
+static void set_test_type(const char *type)
+{
+ if (!strcmp(type, "anon")) {
+ test_type = TEST_ANON;
+ uffd_test_ops = &anon_uffd_test_ops;
+ } else if (!strcmp(type, "hugetlb")) {
+ test_type = TEST_HUGETLB;
+ uffd_test_ops = &hugetlb_uffd_test_ops;
+ } else if (!strcmp(type, "hugetlb_shared")) {
+ map_shared = true;
+ test_type = TEST_HUGETLB;
+ uffd_test_ops = &hugetlb_uffd_test_ops;
+ } else if (!strcmp(type, "shmem")) {
+ map_shared = true;
+ test_type = TEST_SHMEM;
+ uffd_test_ops = &shmem_uffd_test_ops;
+ } else {
+ fprintf(stderr, "Unknown test type: %s\n", type), exit(1);
+ }
+
+ if (test_type == TEST_HUGETLB)
+ page_size = default_huge_page_size();
+ else
+ page_size = sysconf(_SC_PAGE_SIZE);
+
+ if (!page_size)
+ fprintf(stderr, "Unable to determine page size\n"),
+ exit(2);
+ if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
+ > page_size)
+ fprintf(stderr, "Impossible to run this test\n"), exit(2);
+}
+
+static void sigalrm(int sig)
+{
+ if (sig != SIGALRM)
+ abort();
+ test_uffdio_copy_eexist = true;
+ test_uffdio_zeropage_eexist = true;
+ alarm(ALARM_INTERVAL_SECS);
+}
+
+int main(int argc, char **argv)
+{
+ if (argc < 4)
+ fprintf(stderr, "Usage: <test type> <MiB> <bounces> [hugetlbfs_file]\n"),
+ exit(1);
+
+ if (signal(SIGALRM, sigalrm) == SIG_ERR)
+ fprintf(stderr, "failed to arm SIGALRM"), exit(1);
+ alarm(ALARM_INTERVAL_SECS);
+
+ set_test_type(argv[1]);
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size /
+ nr_cpus;
+ if (!nr_pages_per_cpu) {
+ fprintf(stderr, "invalid MiB\n");
+ fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+ }
+
+ bounces = atoi(argv[3]);
+ if (bounces <= 0) {
+ fprintf(stderr, "invalid bounces\n");
+ fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+ }
+ nr_pages = nr_pages_per_cpu * nr_cpus;
+
+ if (test_type == TEST_HUGETLB) {
+ if (argc < 5)
+ fprintf(stderr, "Usage: hugetlb <MiB> <bounces> <hugetlbfs_file>\n"),
+ exit(1);
+ huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
+ if (huge_fd < 0) {
+ fprintf(stderr, "Open of %s failed", argv[3]);
+ perror("open");
+ exit(1);
+ }
+ if (ftruncate(huge_fd, 0)) {
+ fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]);
+ perror("ftruncate");
+ exit(1);
+ }
+ }
+ printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
+ nr_pages, nr_pages_per_cpu);
+ return userfaultfd_stress();
+}
+
+#else /* __NR_userfaultfd */
+
+#warning "missing __NR_userfaultfd definition"
+
+int main(void)
+{
+ printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/vm/va_128TBswitch.c b/tools/testing/selftests/vm/va_128TBswitch.c
new file mode 100644
index 000000000..e7fe734c3
--- /dev/null
+++ b/tools/testing/selftests/vm/va_128TBswitch.c
@@ -0,0 +1,297 @@
+/*
+ *
+ * Authors: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+ * Authors: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <stdio.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#ifdef __powerpc64__
+#define PAGE_SIZE (64 << 10)
+/*
+ * This will work with 16M and 2M hugepage size
+ */
+#define HUGETLB_SIZE (16 << 20)
+#else
+#define PAGE_SIZE (4 << 10)
+#define HUGETLB_SIZE (2 << 20)
+#endif
+
+/*
+ * >= 128TB is the hint addr value we used to select
+ * large address space.
+ */
+#define ADDR_SWITCH_HINT (1UL << 47)
+#define LOW_ADDR ((void *) (1UL << 30))
+#define HIGH_ADDR ((void *) (1UL << 48))
+
+struct testcase {
+ void *addr;
+ unsigned long size;
+ unsigned long flags;
+ const char *msg;
+ unsigned int low_addr_required:1;
+ unsigned int keep_mapped:1;
+};
+
+static struct testcase testcases[] = {
+ {
+ /*
+ * If stack is moved, we could possibly allocate
+ * this at the requested address.
+ */
+ .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)),
+ .size = PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)",
+ .low_addr_required = 1,
+ },
+ {
+ /*
+ * We should never allocate at the requested address or above it
+ * The len cross the 128TB boundary. Without MAP_FIXED
+ * we will always search in the lower address space.
+ */
+ .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)),
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, (2 * PAGE_SIZE))",
+ .low_addr_required = 1,
+ },
+ {
+ /*
+ * Exact mapping at 128TB, the area is free we should get that
+ * even without MAP_FIXED.
+ */
+ .addr = ((void *)(ADDR_SWITCH_HINT)),
+ .size = PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)(ADDR_SWITCH_HINT),
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)",
+ },
+ {
+ .addr = NULL,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(NULL)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = LOW_ADDR,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(LOW_ADDR)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(HIGH_ADDR)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(HIGH_ADDR) again",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(HIGH_ADDR, MAP_FIXED)",
+ },
+ {
+ .addr = (void *) -1,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *) -1,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1) again",
+ },
+ {
+ .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)),
+ .size = PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE),
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2 * PAGE_SIZE)",
+ .low_addr_required = 1,
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE / 2),
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE/2 , 2 * PAGE_SIZE)",
+ .low_addr_required = 1,
+ .keep_mapped = 1,
+ },
+ {
+ .addr = ((void *)(ADDR_SWITCH_HINT)),
+ .size = PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)",
+ },
+ {
+ .addr = (void *)(ADDR_SWITCH_HINT),
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)",
+ },
+};
+
+static struct testcase hugetlb_testcases[] = {
+ {
+ .addr = NULL,
+ .size = HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(NULL, MAP_HUGETLB)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = LOW_ADDR,
+ .size = HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(LOW_ADDR, MAP_HUGETLB)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(HIGH_ADDR, MAP_HUGETLB)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)",
+ },
+ {
+ .addr = (void *) -1,
+ .size = HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1, MAP_HUGETLB)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *) -1,
+ .size = HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1, MAP_HUGETLB) again",
+ },
+ {
+ .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE),
+ .size = 2 * HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2*HUGETLB_SIZE, MAP_HUGETLB)",
+ .low_addr_required = 1,
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)(ADDR_SWITCH_HINT),
+ .size = 2 * HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(ADDR_SWITCH_HINT , 2*HUGETLB_SIZE, MAP_FIXED | MAP_HUGETLB)",
+ },
+};
+
+static int run_test(struct testcase *test, int count)
+{
+ void *p;
+ int i, ret = 0;
+
+ for (i = 0; i < count; i++) {
+ struct testcase *t = test + i;
+
+ p = mmap(t->addr, t->size, PROT_READ | PROT_WRITE, t->flags, -1, 0);
+
+ printf("%s: %p - ", t->msg, p);
+
+ if (p == MAP_FAILED) {
+ printf("FAILED\n");
+ ret = 1;
+ continue;
+ }
+
+ if (t->low_addr_required && p >= (void *)(ADDR_SWITCH_HINT)) {
+ printf("FAILED\n");
+ ret = 1;
+ } else {
+ /*
+ * Do a dereference of the address returned so that we catch
+ * bugs in page fault handling
+ */
+ memset(p, 0, t->size);
+ printf("OK\n");
+ }
+ if (!t->keep_mapped)
+ munmap(p, t->size);
+ }
+
+ return ret;
+}
+
+static int supported_arch(void)
+{
+#if defined(__powerpc64__)
+ return 1;
+#elif defined(__x86_64__)
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+int main(int argc, char **argv)
+{
+ int ret;
+
+ if (!supported_arch())
+ return 0;
+
+ ret = run_test(testcases, ARRAY_SIZE(testcases));
+ if (argc == 2 && !strcmp(argv[1], "--run-hugetlb"))
+ ret = run_test(hugetlb_testcases, ARRAY_SIZE(hugetlb_testcases));
+ return ret;
+}
diff --git a/tools/testing/selftests/vm/virtual_address_range.c b/tools/testing/selftests/vm/virtual_address_range.c
new file mode 100644
index 000000000..1830d66a6
--- /dev/null
+++ b/tools/testing/selftests/vm/virtual_address_range.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright 2017, Anshuman Khandual, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Works on architectures which support 128TB virtual
+ * address range and beyond.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+
+/*
+ * Maximum address range mapped with a single mmap()
+ * call is little bit more than 16GB. Hence 16GB is
+ * chosen as the single chunk size for address space
+ * mapping.
+ */
+#define MAP_CHUNK_SIZE 17179869184UL /* 16GB */
+
+/*
+ * Address space till 128TB is mapped without any hint
+ * and is enabled by default. Address space beyond 128TB
+ * till 512TB is obtained by passing hint address as the
+ * first argument into mmap() system call.
+ *
+ * The process heap address space is divided into two
+ * different areas one below 128TB and one above 128TB
+ * till it reaches 512TB. One with size 128TB and the
+ * other being 384TB.
+ *
+ * On Arm64 the address space is 256TB and no high mappings
+ * are supported so far.
+ */
+
+#define NR_CHUNKS_128TB 8192UL /* Number of 16GB chunks for 128TB */
+#define NR_CHUNKS_256TB (NR_CHUNKS_128TB * 2UL)
+#define NR_CHUNKS_384TB (NR_CHUNKS_128TB * 3UL)
+
+#define ADDR_MARK_128TB (1UL << 47) /* First address beyond 128TB */
+#define ADDR_MARK_256TB (1UL << 48) /* First address beyond 256TB */
+
+#ifdef __aarch64__
+#define HIGH_ADDR_MARK ADDR_MARK_256TB
+#define HIGH_ADDR_SHIFT 49
+#define NR_CHUNKS_LOW NR_CHUNKS_256TB
+#define NR_CHUNKS_HIGH 0
+#else
+#define HIGH_ADDR_MARK ADDR_MARK_128TB
+#define HIGH_ADDR_SHIFT 48
+#define NR_CHUNKS_LOW NR_CHUNKS_128TB
+#define NR_CHUNKS_HIGH NR_CHUNKS_384TB
+#endif
+
+static char *hind_addr(void)
+{
+ int bits = HIGH_ADDR_SHIFT + rand() % (63 - HIGH_ADDR_SHIFT);
+
+ return (char *) (1UL << bits);
+}
+
+static int validate_addr(char *ptr, int high_addr)
+{
+ unsigned long addr = (unsigned long) ptr;
+
+ if (high_addr) {
+ if (addr < HIGH_ADDR_MARK) {
+ printf("Bad address %lx\n", addr);
+ return 1;
+ }
+ return 0;
+ }
+
+ if (addr > HIGH_ADDR_MARK) {
+ printf("Bad address %lx\n", addr);
+ return 1;
+ }
+ return 0;
+}
+
+static int validate_lower_address_hint(void)
+{
+ char *ptr;
+
+ ptr = mmap((void *) (1UL << 45), MAP_CHUNK_SIZE, PROT_READ |
+ PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (ptr == MAP_FAILED)
+ return 0;
+
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ char *ptr[NR_CHUNKS_LOW];
+ char *hptr[NR_CHUNKS_HIGH];
+ char *hint;
+ unsigned long i, lchunks, hchunks;
+
+ for (i = 0; i < NR_CHUNKS_LOW; i++) {
+ ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (ptr[i] == MAP_FAILED) {
+ if (validate_lower_address_hint())
+ return 1;
+ break;
+ }
+
+ if (validate_addr(ptr[i], 0))
+ return 1;
+ }
+ lchunks = i;
+
+ for (i = 0; i < NR_CHUNKS_HIGH; i++) {
+ hint = hind_addr();
+ hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (hptr[i] == MAP_FAILED)
+ break;
+
+ if (validate_addr(hptr[i], 1))
+ return 1;
+ }
+ hchunks = i;
+
+ for (i = 0; i < lchunks; i++)
+ munmap(ptr[i], MAP_CHUNK_SIZE);
+
+ for (i = 0; i < hchunks; i++)
+ munmap(hptr[i], MAP_CHUNK_SIZE);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/watchdog/.gitignore b/tools/testing/selftests/watchdog/.gitignore
new file mode 100644
index 000000000..5aac51575
--- /dev/null
+++ b/tools/testing/selftests/watchdog/.gitignore
@@ -0,0 +1 @@
+watchdog-test
diff --git a/tools/testing/selftests/watchdog/Makefile b/tools/testing/selftests/watchdog/Makefile
new file mode 100644
index 000000000..6b5598b55
--- /dev/null
+++ b/tools/testing/selftests/watchdog/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := watchdog-test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c
new file mode 100644
index 000000000..f1c6e025c
--- /dev/null
+++ b/tools/testing/selftests/watchdog/watchdog-test.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Watchdog Driver Test Program
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <linux/types.h>
+#include <linux/watchdog.h>
+
+#define DEFAULT_PING_RATE 1
+
+int fd;
+const char v = 'V';
+static const char sopts[] = "bdehp:t:";
+static const struct option lopts[] = {
+ {"bootstatus", no_argument, NULL, 'b'},
+ {"disable", no_argument, NULL, 'd'},
+ {"enable", no_argument, NULL, 'e'},
+ {"help", no_argument, NULL, 'h'},
+ {"pingrate", required_argument, NULL, 'p'},
+ {"timeout", required_argument, NULL, 't'},
+ {NULL, no_argument, NULL, 0x0}
+};
+
+/*
+ * This function simply sends an IOCTL to the driver, which in turn ticks
+ * the PC Watchdog card to reset its internal timer so it doesn't trigger
+ * a computer reset.
+ */
+static void keep_alive(void)
+{
+ int dummy;
+ int ret;
+
+ ret = ioctl(fd, WDIOC_KEEPALIVE, &dummy);
+ if (!ret)
+ printf(".");
+}
+
+/*
+ * The main program. Run the program with "-d" to disable the card,
+ * or "-e" to enable the card.
+ */
+
+static void term(int sig)
+{
+ int ret = write(fd, &v, 1);
+
+ close(fd);
+ if (ret < 0)
+ printf("\nStopping watchdog ticks failed (%d)...\n", errno);
+ else
+ printf("\nStopping watchdog ticks...\n");
+ exit(0);
+}
+
+static void usage(char *progname)
+{
+ printf("Usage: %s [options]\n", progname);
+ printf(" -b, --bootstatus Get last boot status (Watchdog/POR)\n");
+ printf(" -d, --disable Turn off the watchdog timer\n");
+ printf(" -e, --enable Turn on the watchdog timer\n");
+ printf(" -h, --help Print the help message\n");
+ printf(" -p, --pingrate=P Set ping rate to P seconds (default %d)\n", DEFAULT_PING_RATE);
+ printf(" -t, --timeout=T Set timeout to T seconds\n");
+ printf("\n");
+ printf("Parameters are parsed left-to-right in real-time.\n");
+ printf("Example: %s -d -t 10 -p 5 -e\n", progname);
+}
+
+int main(int argc, char *argv[])
+{
+ int flags;
+ unsigned int ping_rate = DEFAULT_PING_RATE;
+ int ret;
+ int c;
+ int oneshot = 0;
+
+ setbuf(stdout, NULL);
+
+ fd = open("/dev/watchdog", O_WRONLY);
+
+ if (fd == -1) {
+ if (errno == ENOENT)
+ printf("Watchdog device not enabled.\n");
+ else if (errno == EACCES)
+ printf("Run watchdog as root.\n");
+ else
+ printf("Watchdog device open failed %s\n",
+ strerror(errno));
+ exit(-1);
+ }
+
+ while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) {
+ switch (c) {
+ case 'b':
+ flags = 0;
+ oneshot = 1;
+ ret = ioctl(fd, WDIOC_GETBOOTSTATUS, &flags);
+ if (!ret)
+ printf("Last boot is caused by: %s.\n", (flags != 0) ?
+ "Watchdog" : "Power-On-Reset");
+ else
+ printf("WDIOC_GETBOOTSTATUS error '%s'\n", strerror(errno));
+ break;
+ case 'd':
+ flags = WDIOS_DISABLECARD;
+ ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
+ if (!ret)
+ printf("Watchdog card disabled.\n");
+ else
+ printf("WDIOS_DISABLECARD error '%s'\n", strerror(errno));
+ break;
+ case 'e':
+ flags = WDIOS_ENABLECARD;
+ ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
+ if (!ret)
+ printf("Watchdog card enabled.\n");
+ else
+ printf("WDIOS_ENABLECARD error '%s'\n", strerror(errno));
+ break;
+ case 'p':
+ ping_rate = strtoul(optarg, NULL, 0);
+ if (!ping_rate)
+ ping_rate = DEFAULT_PING_RATE;
+ printf("Watchdog ping rate set to %u seconds.\n", ping_rate);
+ break;
+ case 't':
+ flags = strtoul(optarg, NULL, 0);
+ ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags);
+ if (!ret)
+ printf("Watchdog timeout set to %u seconds.\n", flags);
+ else
+ printf("WDIOC_SETTIMEOUT error '%s'\n", strerror(errno));
+ break;
+ default:
+ usage(argv[0]);
+ goto end;
+ }
+ }
+
+ if (oneshot)
+ goto end;
+
+ printf("Watchdog Ticking Away!\n");
+
+ signal(SIGINT, term);
+
+ while (1) {
+ keep_alive();
+ sleep(ping_rate);
+ }
+end:
+ ret = write(fd, &v, 1);
+ if (ret < 0)
+ printf("Stopping watchdog ticks failed (%d)...\n", errno);
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore
new file mode 100644
index 000000000..7757f73ff
--- /dev/null
+++ b/tools/testing/selftests/x86/.gitignore
@@ -0,0 +1,15 @@
+*_32
+*_64
+single_step_syscall
+sysret_ss_attrs
+syscall_nt
+ptrace_syscall
+test_mremap_vdso
+check_initial_reg_state
+sigreturn
+ldt_gdt
+iopl
+mpx-mini-test
+ioperm
+protection_keys
+test_vdso
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
new file mode 100644
index 000000000..186520198
--- /dev/null
+++ b/tools/testing/selftests/x86/Makefile
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+include ../lib.mk
+
+.PHONY: all all_32 all_64 warn_32bit_failure clean
+
+UNAME_M := $(shell uname -m)
+CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
+
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
+ check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
+ protection_keys test_vdso test_vsyscall mov_ss_trap
+TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
+ test_FCMOV test_FCOMI test_FISTTP \
+ vdso_restorer
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
+# Some selftests require 32bit support enabled also on 64bit systems
+TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
+
+TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) $(TARGETS_C_32BIT_NEEDED)
+TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),11)
+TARGETS_C_64BIT_ALL += $(TARGETS_C_32BIT_NEEDED)
+endif
+
+BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
+BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
+
+BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
+BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
+
+CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
+
+# call32_from_64 in thunks.S uses absolute addresses.
+ifeq ($(CAN_BUILD_WITH_NOPIE),1)
+CFLAGS += -no-pie
+endif
+
+define gen-target-rule-32
+$(1) $(1)_32: $(OUTPUT)/$(1)_32
+.PHONY: $(1) $(1)_32
+endef
+
+define gen-target-rule-64
+$(1) $(1)_64: $(OUTPUT)/$(1)_64
+.PHONY: $(1) $(1)_64
+endef
+
+ifeq ($(CAN_BUILD_I386),1)
+all: all_32
+TEST_PROGS += $(BINARIES_32)
+EXTRA_CFLAGS += -DCAN_BUILD_32
+$(foreach t,$(TARGETS_C_32BIT_ALL),$(eval $(call gen-target-rule-32,$(t))))
+endif
+
+ifeq ($(CAN_BUILD_X86_64),1)
+all: all_64
+TEST_PROGS += $(BINARIES_64)
+EXTRA_CFLAGS += -DCAN_BUILD_64
+$(foreach t,$(TARGETS_C_64BIT_ALL),$(eval $(call gen-target-rule-64,$(t))))
+endif
+
+all_32: $(BINARIES_32)
+
+all_64: $(BINARIES_64)
+
+EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64)
+
+$(BINARIES_32): $(OUTPUT)/%_32: %.c
+ $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm
+
+$(BINARIES_64): $(OUTPUT)/%_64: %.c
+ $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
+
+# x86_64 users should be encouraged to install 32-bit libraries
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01)
+all: warn_32bit_failure
+
+warn_32bit_failure:
+ @echo "Warning: you seem to have a broken 32-bit build" 2>&1; \
+ echo "environment. This will reduce test coverage of 64-bit" 2>&1; \
+ echo "kernels. If you are using a Debian-like distribution," 2>&1; \
+ echo "try:"; 2>&1; \
+ echo ""; \
+ echo " apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \
+ echo ""; \
+ echo "If you are using a Fedora-like distribution, try:"; \
+ echo ""; \
+ echo " yum install glibc-devel.*i686"; \
+ exit 0;
+endif
+
+# Some tests have additional dependencies.
+$(OUTPUT)/sysret_ss_attrs_64: thunks.S
+$(OUTPUT)/ptrace_syscall_32: raw_syscall_helper_32.S
+$(OUTPUT)/test_syscall_vdso_32: thunks_32.S
+
+# check_initial_reg_state is special: it needs a custom entry, and it
+# needs to be static so that its interpreter doesn't destroy its initial
+# state.
+$(OUTPUT)/check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static
+$(OUTPUT)/check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static
diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh
new file mode 100755
index 000000000..356689c56
--- /dev/null
+++ b/tools/testing/selftests/x86/check_cc.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# check_cc.sh - Helper to test userspace compilation support
+# Copyright (c) 2015 Andrew Lutomirski
+# GPL v2
+
+CC="$1"
+TESTPROG="$2"
+shift 2
+
+if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
+ echo 1
+else
+ echo 0
+fi
+
+exit 0
diff --git a/tools/testing/selftests/x86/check_initial_reg_state.c b/tools/testing/selftests/x86/check_initial_reg_state.c
new file mode 100644
index 000000000..6aaed9b85
--- /dev/null
+++ b/tools/testing/selftests/x86/check_initial_reg_state.c
@@ -0,0 +1,109 @@
+/*
+ * check_initial_reg_state.c - check that execve sets the correct state
+ * Copyright (c) 2014-2016 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+
+unsigned long ax, bx, cx, dx, si, di, bp, sp, flags;
+unsigned long r8, r9, r10, r11, r12, r13, r14, r15;
+
+asm (
+ ".pushsection .text\n\t"
+ ".type real_start, @function\n\t"
+ ".global real_start\n\t"
+ "real_start:\n\t"
+#ifdef __x86_64__
+ "mov %rax, ax\n\t"
+ "mov %rbx, bx\n\t"
+ "mov %rcx, cx\n\t"
+ "mov %rdx, dx\n\t"
+ "mov %rsi, si\n\t"
+ "mov %rdi, di\n\t"
+ "mov %rbp, bp\n\t"
+ "mov %rsp, sp\n\t"
+ "mov %r8, r8\n\t"
+ "mov %r9, r9\n\t"
+ "mov %r10, r10\n\t"
+ "mov %r11, r11\n\t"
+ "mov %r12, r12\n\t"
+ "mov %r13, r13\n\t"
+ "mov %r14, r14\n\t"
+ "mov %r15, r15\n\t"
+ "pushfq\n\t"
+ "popq flags\n\t"
+#else
+ "mov %eax, ax\n\t"
+ "mov %ebx, bx\n\t"
+ "mov %ecx, cx\n\t"
+ "mov %edx, dx\n\t"
+ "mov %esi, si\n\t"
+ "mov %edi, di\n\t"
+ "mov %ebp, bp\n\t"
+ "mov %esp, sp\n\t"
+ "pushfl\n\t"
+ "popl flags\n\t"
+#endif
+ "jmp _start\n\t"
+ ".size real_start, . - real_start\n\t"
+ ".popsection");
+
+int main()
+{
+ int nerrs = 0;
+
+ if (sp == 0) {
+ printf("[FAIL]\tTest was built incorrectly\n");
+ return 1;
+ }
+
+ if (ax || bx || cx || dx || si || di || bp
+#ifdef __x86_64__
+ || r8 || r9 || r10 || r11 || r12 || r13 || r14 || r15
+#endif
+ ) {
+ printf("[FAIL]\tAll GPRs except SP should be 0\n");
+#define SHOW(x) printf("\t" #x " = 0x%lx\n", x);
+ SHOW(ax);
+ SHOW(bx);
+ SHOW(cx);
+ SHOW(dx);
+ SHOW(si);
+ SHOW(di);
+ SHOW(bp);
+ SHOW(sp);
+#ifdef __x86_64__
+ SHOW(r8);
+ SHOW(r9);
+ SHOW(r10);
+ SHOW(r11);
+ SHOW(r12);
+ SHOW(r13);
+ SHOW(r14);
+ SHOW(r15);
+#endif
+ nerrs++;
+ } else {
+ printf("[OK]\tAll GPRs except SP are 0\n");
+ }
+
+ if (flags != 0x202) {
+ printf("[FAIL]\tFLAGS is 0x%lx, but it should be 0x202\n", flags);
+ nerrs++;
+ } else {
+ printf("[OK]\tFLAGS is 0x202\n");
+ }
+
+ return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
new file mode 100644
index 000000000..ade443a88
--- /dev/null
+++ b/tools/testing/selftests/x86/entry_from_vm86.c
@@ -0,0 +1,349 @@
+/*
+ * entry_from_vm86.c - tests kernel entries from vm86 mode
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This exercises a few paths that need to special-case vm86 mode.
+ *
+ * GPL v2.
+ */
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <sys/vm86.h>
+
+static unsigned long load_addr = 0x10000;
+static int nerrs = 0;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static sig_atomic_t got_signal;
+
+static void sighandler(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_VM ||
+ (ctx->uc_mcontext.gregs[REG_CS] & 3) != 3) {
+ printf("[FAIL]\tSignal frame should not reflect vm86 mode\n");
+ nerrs++;
+ }
+
+ const char *signame;
+ if (sig == SIGSEGV)
+ signame = "SIGSEGV";
+ else if (sig == SIGILL)
+ signame = "SIGILL";
+ else
+ signame = "unexpected signal";
+
+ printf("[INFO]\t%s: FLAGS = 0x%lx, CS = 0x%hx\n", signame,
+ (unsigned long)ctx->uc_mcontext.gregs[REG_EFL],
+ (unsigned short)ctx->uc_mcontext.gregs[REG_CS]);
+
+ got_signal = 1;
+}
+
+asm (
+ ".pushsection .rodata\n\t"
+ ".type vmcode_bound, @object\n\t"
+ "vmcode:\n\t"
+ "vmcode_bound:\n\t"
+ ".code16\n\t"
+ "bound %ax, (2048)\n\t"
+ "int3\n\t"
+ "vmcode_sysenter:\n\t"
+ "sysenter\n\t"
+ "vmcode_syscall:\n\t"
+ "syscall\n\t"
+ "vmcode_sti:\n\t"
+ "sti\n\t"
+ "vmcode_int3:\n\t"
+ "int3\n\t"
+ "vmcode_int80:\n\t"
+ "int $0x80\n\t"
+ "vmcode_popf_hlt:\n\t"
+ "push %ax\n\t"
+ "popf\n\t"
+ "hlt\n\t"
+ "vmcode_umip:\n\t"
+ /* addressing via displacements */
+ "smsw (2052)\n\t"
+ "sidt (2054)\n\t"
+ "sgdt (2060)\n\t"
+ /* addressing via registers */
+ "mov $2066, %bx\n\t"
+ "smsw (%bx)\n\t"
+ "mov $2068, %bx\n\t"
+ "sidt (%bx)\n\t"
+ "mov $2074, %bx\n\t"
+ "sgdt (%bx)\n\t"
+ /* register operands, only for smsw */
+ "smsw %ax\n\t"
+ "mov %ax, (2080)\n\t"
+ "int3\n\t"
+ "vmcode_umip_str:\n\t"
+ "str %eax\n\t"
+ "vmcode_umip_sldt:\n\t"
+ "sldt %eax\n\t"
+ "int3\n\t"
+ ".size vmcode, . - vmcode\n\t"
+ "end_vmcode:\n\t"
+ ".code32\n\t"
+ ".popsection"
+ );
+
+extern unsigned char vmcode[], end_vmcode[];
+extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[],
+ vmcode_sti[], vmcode_int3[], vmcode_int80[], vmcode_popf_hlt[],
+ vmcode_umip[], vmcode_umip_str[], vmcode_umip_sldt[];
+
+/* Returns false if the test was skipped. */
+static bool do_test(struct vm86plus_struct *v86, unsigned long eip,
+ unsigned int rettype, unsigned int retarg,
+ const char *text)
+{
+ long ret;
+
+ printf("[RUN]\t%s from vm86 mode\n", text);
+ v86->regs.eip = eip;
+ ret = vm86(VM86_ENTER, v86);
+
+ if (ret == -1 && (errno == ENOSYS || errno == EPERM)) {
+ printf("[SKIP]\tvm86 %s\n",
+ errno == ENOSYS ? "not supported" : "not allowed");
+ return false;
+ }
+
+ if (VM86_TYPE(ret) == VM86_INTx) {
+ char trapname[32];
+ int trapno = VM86_ARG(ret);
+ if (trapno == 13)
+ strcpy(trapname, "GP");
+ else if (trapno == 5)
+ strcpy(trapname, "BR");
+ else if (trapno == 14)
+ strcpy(trapname, "PF");
+ else
+ sprintf(trapname, "%d", trapno);
+
+ printf("[INFO]\tExited vm86 mode due to #%s\n", trapname);
+ } else if (VM86_TYPE(ret) == VM86_UNKNOWN) {
+ printf("[INFO]\tExited vm86 mode due to unhandled GP fault\n");
+ } else if (VM86_TYPE(ret) == VM86_TRAP) {
+ printf("[INFO]\tExited vm86 mode due to a trap (arg=%ld)\n",
+ VM86_ARG(ret));
+ } else if (VM86_TYPE(ret) == VM86_SIGNAL) {
+ printf("[INFO]\tExited vm86 mode due to a signal\n");
+ } else if (VM86_TYPE(ret) == VM86_STI) {
+ printf("[INFO]\tExited vm86 mode due to STI\n");
+ } else {
+ printf("[INFO]\tExited vm86 mode due to type %ld, arg %ld\n",
+ VM86_TYPE(ret), VM86_ARG(ret));
+ }
+
+ if (rettype == -1 ||
+ (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) {
+ printf("[OK]\tReturned correctly\n");
+ } else {
+ printf("[FAIL]\tIncorrect return reason (started at eip = 0x%lx, ended at eip = 0x%lx)\n", eip, v86->regs.eip);
+ nerrs++;
+ }
+
+ return true;
+}
+
+void do_umip_tests(struct vm86plus_struct *vm86, unsigned char *test_mem)
+{
+ struct table_desc {
+ unsigned short limit;
+ unsigned long base;
+ } __attribute__((packed));
+
+ /* Initialize variables with arbitrary values */
+ struct table_desc gdt1 = { .base = 0x3c3c3c3c, .limit = 0x9999 };
+ struct table_desc gdt2 = { .base = 0x1a1a1a1a, .limit = 0xaeae };
+ struct table_desc idt1 = { .base = 0x7b7b7b7b, .limit = 0xf1f1 };
+ struct table_desc idt2 = { .base = 0x89898989, .limit = 0x1313 };
+ unsigned short msw1 = 0x1414, msw2 = 0x2525, msw3 = 3737;
+
+ /* UMIP -- exit with INT3 unless kernel emulation did not trap #GP */
+ do_test(vm86, vmcode_umip - vmcode, VM86_TRAP, 3, "UMIP tests");
+
+ /* Results from displacement-only addressing */
+ msw1 = *(unsigned short *)(test_mem + 2052);
+ memcpy(&idt1, test_mem + 2054, sizeof(idt1));
+ memcpy(&gdt1, test_mem + 2060, sizeof(gdt1));
+
+ /* Results from register-indirect addressing */
+ msw2 = *(unsigned short *)(test_mem + 2066);
+ memcpy(&idt2, test_mem + 2068, sizeof(idt2));
+ memcpy(&gdt2, test_mem + 2074, sizeof(gdt2));
+
+ /* Results when using register operands */
+ msw3 = *(unsigned short *)(test_mem + 2080);
+
+ printf("[INFO]\tResult from SMSW:[0x%04x]\n", msw1);
+ printf("[INFO]\tResult from SIDT: limit[0x%04x]base[0x%08lx]\n",
+ idt1.limit, idt1.base);
+ printf("[INFO]\tResult from SGDT: limit[0x%04x]base[0x%08lx]\n",
+ gdt1.limit, gdt1.base);
+
+ if (msw1 != msw2 || msw1 != msw3)
+ printf("[FAIL]\tAll the results of SMSW should be the same.\n");
+ else
+ printf("[PASS]\tAll the results from SMSW are identical.\n");
+
+ if (memcmp(&gdt1, &gdt2, sizeof(gdt1)))
+ printf("[FAIL]\tAll the results of SGDT should be the same.\n");
+ else
+ printf("[PASS]\tAll the results from SGDT are identical.\n");
+
+ if (memcmp(&idt1, &idt2, sizeof(idt1)))
+ printf("[FAIL]\tAll the results of SIDT should be the same.\n");
+ else
+ printf("[PASS]\tAll the results from SIDT are identical.\n");
+
+ sethandler(SIGILL, sighandler, 0);
+ do_test(vm86, vmcode_umip_str - vmcode, VM86_SIGNAL, 0,
+ "STR instruction");
+ clearhandler(SIGILL);
+
+ sethandler(SIGILL, sighandler, 0);
+ do_test(vm86, vmcode_umip_sldt - vmcode, VM86_SIGNAL, 0,
+ "SLDT instruction");
+ clearhandler(SIGILL);
+}
+
+int main(void)
+{
+ struct vm86plus_struct v86;
+ unsigned char *addr = mmap((void *)load_addr, 4096,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1,0);
+ if (addr != (unsigned char *)load_addr)
+ err(1, "mmap");
+
+ memcpy(addr, vmcode, end_vmcode - vmcode);
+ addr[2048] = 2;
+ addr[2050] = 3;
+
+ memset(&v86, 0, sizeof(v86));
+
+ v86.regs.cs = load_addr / 16;
+ v86.regs.ss = load_addr / 16;
+ v86.regs.ds = load_addr / 16;
+ v86.regs.es = load_addr / 16;
+
+ /* Use the end of the page as our stack. */
+ v86.regs.esp = 4096;
+
+ assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */
+
+ /* #BR -- should deliver SIG??? */
+ do_test(&v86, vmcode_bound - vmcode, VM86_INTx, 5, "#BR");
+
+ /*
+ * SYSENTER -- should cause #GP or #UD depending on CPU.
+ * Expected return type -1 means that we shouldn't validate
+ * the vm86 return value. This will avoid problems on non-SEP
+ * CPUs.
+ */
+ sethandler(SIGILL, sighandler, 0);
+ do_test(&v86, vmcode_sysenter - vmcode, -1, 0, "SYSENTER");
+ clearhandler(SIGILL);
+
+ /*
+ * SYSCALL would be a disaster in VM86 mode. Fortunately,
+ * there is no kernel that both enables SYSCALL and sets
+ * EFER.SCE, so it's #UD on all systems. But vm86 is
+ * buggy (or has a "feature"), so the SIGILL will actually
+ * be delivered.
+ */
+ sethandler(SIGILL, sighandler, 0);
+ do_test(&v86, vmcode_syscall - vmcode, VM86_SIGNAL, 0, "SYSCALL");
+ clearhandler(SIGILL);
+
+ /* STI with VIP set */
+ v86.regs.eflags |= X86_EFLAGS_VIP;
+ v86.regs.eflags &= ~X86_EFLAGS_IF;
+ do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set");
+
+ /* POPF with VIP set but IF clear: should not trap */
+ v86.regs.eflags = X86_EFLAGS_VIP;
+ v86.regs.eax = 0;
+ do_test(&v86, vmcode_popf_hlt - vmcode, VM86_UNKNOWN, 0, "POPF with VIP set and IF clear");
+
+ /* POPF with VIP set and IF set: should trap */
+ v86.regs.eflags = X86_EFLAGS_VIP;
+ v86.regs.eax = X86_EFLAGS_IF;
+ do_test(&v86, vmcode_popf_hlt - vmcode, VM86_STI, 0, "POPF with VIP and IF set");
+
+ /* POPF with VIP clear and IF set: should not trap */
+ v86.regs.eflags = 0;
+ v86.regs.eax = X86_EFLAGS_IF;
+ do_test(&v86, vmcode_popf_hlt - vmcode, VM86_UNKNOWN, 0, "POPF with VIP clear and IF set");
+
+ v86.regs.eflags = 0;
+
+ /* INT3 -- should cause #BP */
+ do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3");
+
+ /* INT80 -- should exit with "INTx 0x80" */
+ v86.regs.eax = (unsigned int)-1;
+ do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80");
+
+ /* UMIP -- should exit with INTx 0x80 unless UMIP was not disabled */
+ do_umip_tests(&v86, addr);
+
+ /* Execute a null pointer */
+ v86.regs.cs = 0;
+ v86.regs.ss = 0;
+ sethandler(SIGSEGV, sighandler, 0);
+ got_signal = 0;
+ if (do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer") &&
+ !got_signal) {
+ printf("[FAIL]\tDid not receive SIGSEGV\n");
+ nerrs++;
+ }
+ clearhandler(SIGSEGV);
+
+ /* Make sure nothing explodes if we fork. */
+ if (fork() == 0)
+ return 0;
+
+ return (nerrs == 0 ? 0 : 1);
+}
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
new file mode 100644
index 000000000..f249e042b
--- /dev/null
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -0,0 +1,427 @@
+/*
+ * fsgsbase.c, an fsgsbase test
+ * Copyright (c) 2014-2016 Andy Lutomirski
+ * GPL v2
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <err.h>
+#include <sys/user.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+#include <signal.h>
+#include <limits.h>
+#include <sys/ucontext.h>
+#include <sched.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <asm/ldt.h>
+#include <sys/mman.h>
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+static volatile sig_atomic_t want_segv;
+static volatile unsigned long segv_addr;
+
+static int nerrs;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (!want_segv) {
+ clearhandler(SIGSEGV);
+ return; /* Crash cleanly. */
+ }
+
+ want_segv = false;
+ segv_addr = (unsigned long)si->si_addr;
+
+ ctx->uc_mcontext.gregs[REG_RIP] += 4; /* Skip the faulting mov */
+
+}
+
+enum which_base { FS, GS };
+
+static unsigned long read_base(enum which_base which)
+{
+ unsigned long offset;
+ /*
+ * Unless we have FSGSBASE, there's no direct way to do this from
+ * user mode. We can get at it indirectly using signals, though.
+ */
+
+ want_segv = true;
+
+ offset = 0;
+ if (which == FS) {
+ /* Use a constant-length instruction here. */
+ asm volatile ("mov %%fs:(%%rcx), %%rax" : : "c" (offset) : "rax");
+ } else {
+ asm volatile ("mov %%gs:(%%rcx), %%rax" : : "c" (offset) : "rax");
+ }
+ if (!want_segv)
+ return segv_addr + offset;
+
+ /*
+ * If that didn't segfault, try the other end of the address space.
+ * Unless we get really unlucky and run into the vsyscall page, this
+ * is guaranteed to segfault.
+ */
+
+ offset = (ULONG_MAX >> 1) + 1;
+ if (which == FS) {
+ asm volatile ("mov %%fs:(%%rcx), %%rax"
+ : : "c" (offset) : "rax");
+ } else {
+ asm volatile ("mov %%gs:(%%rcx), %%rax"
+ : : "c" (offset) : "rax");
+ }
+ if (!want_segv)
+ return segv_addr + offset;
+
+ abort();
+}
+
+static void check_gs_value(unsigned long value)
+{
+ unsigned long base;
+ unsigned short sel;
+
+ printf("[RUN]\tARCH_SET_GS to 0x%lx\n", value);
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, value) != 0)
+ err(1, "ARCH_SET_GS");
+
+ asm volatile ("mov %%gs, %0" : "=rm" (sel));
+ base = read_base(GS);
+ if (base == value) {
+ printf("[OK]\tGSBASE was set as expected (selector 0x%hx)\n",
+ sel);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE was not as expected: got 0x%lx (selector 0x%hx)\n",
+ base, sel);
+ }
+
+ if (syscall(SYS_arch_prctl, ARCH_GET_GS, &base) != 0)
+ err(1, "ARCH_GET_GS");
+ if (base == value) {
+ printf("[OK]\tARCH_GET_GS worked as expected (selector 0x%hx)\n",
+ sel);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tARCH_GET_GS was not as expected: got 0x%lx (selector 0x%hx)\n",
+ base, sel);
+ }
+}
+
+static void mov_0_gs(unsigned long initial_base, bool schedule)
+{
+ unsigned long base, arch_base;
+
+ printf("[RUN]\tARCH_SET_GS to 0x%lx then mov 0 to %%gs%s\n", initial_base, schedule ? " and schedule " : "");
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, initial_base) != 0)
+ err(1, "ARCH_SET_GS");
+
+ if (schedule)
+ usleep(10);
+
+ asm volatile ("mov %0, %%gs" : : "rm" (0));
+ base = read_base(GS);
+ if (syscall(SYS_arch_prctl, ARCH_GET_GS, &arch_base) != 0)
+ err(1, "ARCH_GET_GS");
+ if (base == arch_base) {
+ printf("[OK]\tGSBASE is 0x%lx\n", base);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE changed to 0x%lx but kernel reports 0x%lx\n", base, arch_base);
+ }
+}
+
+static volatile unsigned long remote_base;
+static volatile bool remote_hard_zero;
+static volatile unsigned int ftx;
+
+/*
+ * ARCH_SET_FS/GS(0) may or may not program a selector of zero. HARD_ZERO
+ * means to force the selector to zero to improve test coverage.
+ */
+#define HARD_ZERO 0xa1fa5f343cb85fa4
+
+static void do_remote_base()
+{
+ unsigned long to_set = remote_base;
+ bool hard_zero = false;
+ if (to_set == HARD_ZERO) {
+ to_set = 0;
+ hard_zero = true;
+ }
+
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, to_set) != 0)
+ err(1, "ARCH_SET_GS");
+
+ if (hard_zero)
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+ unsigned short sel;
+ asm volatile ("mov %%gs, %0" : "=rm" (sel));
+ printf("\tother thread: ARCH_SET_GS(0x%lx)%s -- sel is 0x%hx\n",
+ to_set, hard_zero ? " and clear gs" : "", sel);
+}
+
+void do_unexpected_base(void)
+{
+ /*
+ * The goal here is to try to arrange for GS == 0, GSBASE !=
+ * 0, and for the the kernel the think that GSBASE == 0.
+ *
+ * To make the test as reliable as possible, this uses
+ * explicit descriptorss. (This is not the only way. This
+ * could use ARCH_SET_GS with a low, nonzero base, but the
+ * relevant side effect of ARCH_SET_GS could change.)
+ */
+
+ /* Step 1: tell the kernel that we have GSBASE == 0. */
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
+ err(1, "ARCH_SET_GS");
+
+ /* Step 2: change GSBASE without telling the kernel. */
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = 0xBAADF00D,
+ .limit = 0xfffff,
+ .seg_32bit = 1,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 1,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
+ printf("\tother thread: using LDT slot 0\n");
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7));
+ } else {
+ /* No modify_ldt for us (configured out, perhaps) */
+
+ struct user_desc *low_desc = mmap(
+ NULL, sizeof(desc),
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
+ memcpy(low_desc, &desc, sizeof(desc));
+
+ low_desc->entry_number = -1;
+
+ /* 32-bit set_thread_area */
+ long ret;
+ asm volatile ("int $0x80"
+ : "=a" (ret) : "a" (243), "b" (low_desc)
+ : "r8", "r9", "r10", "r11");
+ memcpy(&desc, low_desc, sizeof(desc));
+ munmap(low_desc, sizeof(desc));
+
+ if (ret != 0) {
+ printf("[NOTE]\tcould not create a segment -- test won't do anything\n");
+ return;
+ }
+ printf("\tother thread: using GDT slot %d\n", desc.entry_number);
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)((desc.entry_number << 3) | 0x3)));
+ }
+
+ /*
+ * Step 3: set the selector back to zero. On AMD chips, this will
+ * preserve GSBASE.
+ */
+
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+}
+
+static void *threadproc(void *ctx)
+{
+ while (1) {
+ while (ftx == 0)
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0);
+ if (ftx == 3)
+ return NULL;
+
+ if (ftx == 1)
+ do_remote_base();
+ else if (ftx == 2)
+ do_unexpected_base();
+ else
+ errx(1, "helper thread got bad command");
+
+ ftx = 0;
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+ }
+}
+
+static void set_gs_and_switch_to(unsigned long local,
+ unsigned short force_sel,
+ unsigned long remote)
+{
+ unsigned long base;
+ unsigned short sel_pre_sched, sel_post_sched;
+
+ bool hard_zero = false;
+ if (local == HARD_ZERO) {
+ hard_zero = true;
+ local = 0;
+ }
+
+ printf("[RUN]\tARCH_SET_GS(0x%lx)%s, then schedule to 0x%lx\n",
+ local, hard_zero ? " and clear gs" : "", remote);
+ if (force_sel)
+ printf("\tBefore schedule, set selector to 0x%hx\n", force_sel);
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, local) != 0)
+ err(1, "ARCH_SET_GS");
+ if (hard_zero)
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+ if (read_base(GS) != local) {
+ nerrs++;
+ printf("[FAIL]\tGSBASE wasn't set as expected\n");
+ }
+
+ if (force_sel) {
+ asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
+ sel_pre_sched = force_sel;
+ local = read_base(GS);
+
+ /*
+ * Signal delivery seems to mess up weird selectors. Put it
+ * back.
+ */
+ asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
+ } else {
+ asm volatile ("mov %%gs, %0" : "=rm" (sel_pre_sched));
+ }
+
+ remote_base = remote;
+ ftx = 1;
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+ while (ftx != 0)
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
+
+ asm volatile ("mov %%gs, %0" : "=rm" (sel_post_sched));
+ base = read_base(GS);
+ if (base == local && sel_pre_sched == sel_post_sched) {
+ printf("[OK]\tGS/BASE remained 0x%hx/0x%lx\n",
+ sel_pre_sched, local);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx\n",
+ sel_pre_sched, local, sel_post_sched, base);
+ }
+}
+
+static void test_unexpected_base(void)
+{
+ unsigned long base;
+
+ printf("[RUN]\tARCH_SET_GS(0), clear gs, then manipulate GSBASE in a different thread\n");
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
+ err(1, "ARCH_SET_GS");
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+ ftx = 2;
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+ while (ftx != 0)
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
+
+ base = read_base(GS);
+ if (base == 0) {
+ printf("[OK]\tGSBASE remained 0\n");
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE changed to 0x%lx\n", base);
+ }
+}
+
+int main()
+{
+ pthread_t thread;
+
+ sethandler(SIGSEGV, sigsegv, 0);
+
+ check_gs_value(0);
+ check_gs_value(1);
+ check_gs_value(0x200000000);
+ check_gs_value(0);
+ check_gs_value(0x200000000);
+ check_gs_value(1);
+
+ for (int sched = 0; sched < 2; sched++) {
+ mov_0_gs(0, !!sched);
+ mov_0_gs(1, !!sched);
+ mov_0_gs(0x200000000, !!sched);
+ }
+
+ /* Set up for multithreading. */
+
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ err(1, "sched_setaffinity to CPU 0"); /* should never fail */
+
+ if (pthread_create(&thread, 0, threadproc, 0) != 0)
+ err(1, "pthread_create");
+
+ static unsigned long bases_with_hard_zero[] = {
+ 0, HARD_ZERO, 1, 0x200000000,
+ };
+
+ for (int local = 0; local < 4; local++) {
+ for (int remote = 0; remote < 4; remote++) {
+ for (unsigned short s = 0; s < 5; s++) {
+ unsigned short sel = s;
+ if (s == 4)
+ asm ("mov %%ss, %0" : "=rm" (sel));
+ set_gs_and_switch_to(
+ bases_with_hard_zero[local],
+ sel,
+ bases_with_hard_zero[remote]);
+ }
+ }
+ }
+
+ test_unexpected_base();
+
+ ftx = 3; /* Kill the thread. */
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+
+ if (pthread_join(thread, NULL) != 0)
+ err(1, "pthread_join");
+
+ return nerrs == 0 ? 0 : 1;
+}
diff --git a/tools/testing/selftests/x86/ioperm.c b/tools/testing/selftests/x86/ioperm.c
new file mode 100644
index 000000000..01de41c1b
--- /dev/null
+++ b/tools/testing/selftests/x86/ioperm.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ioperm.c - Test case for ioperm(2)
+ * Copyright (c) 2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <sched.h>
+#include <sys/io.h>
+
+static int nerrs = 0;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+ siglongjmp(jmpbuf, 1);
+}
+
+static bool try_outb(unsigned short port)
+{
+ sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
+ if (sigsetjmp(jmpbuf, 1) != 0) {
+ return false;
+ } else {
+ asm volatile ("outb %%al, %w[port]"
+ : : [port] "Nd" (port), "a" (0));
+ return true;
+ }
+ clearhandler(SIGSEGV);
+}
+
+static void expect_ok(unsigned short port)
+{
+ if (!try_outb(port)) {
+ printf("[FAIL]\toutb to 0x%02hx failed\n", port);
+ exit(1);
+ }
+
+ printf("[OK]\toutb to 0x%02hx worked\n", port);
+}
+
+static void expect_gp(unsigned short port)
+{
+ if (try_outb(port)) {
+ printf("[FAIL]\toutb to 0x%02hx worked\n", port);
+ exit(1);
+ }
+
+ printf("[OK]\toutb to 0x%02hx failed\n", port);
+}
+
+int main(void)
+{
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ err(1, "sched_setaffinity to CPU 0");
+
+ expect_gp(0x80);
+ expect_gp(0xed);
+
+ /*
+ * Probe for ioperm support. Note that clearing ioperm bits
+ * works even as nonroot.
+ */
+ printf("[RUN]\tenable 0x80\n");
+ if (ioperm(0x80, 1, 1) != 0) {
+ printf("[OK]\tioperm(0x80, 1, 1) failed (%d) -- try running as root\n",
+ errno);
+ return 0;
+ }
+ expect_ok(0x80);
+ expect_gp(0xed);
+
+ printf("[RUN]\tdisable 0x80\n");
+ if (ioperm(0x80, 1, 0) != 0) {
+ printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+ return 1;
+ }
+ expect_gp(0x80);
+ expect_gp(0xed);
+
+ /* Make sure that fork() preserves ioperm. */
+ if (ioperm(0x80, 1, 1) != 0) {
+ printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+ return 1;
+ }
+
+ pid_t child = fork();
+ if (child == -1)
+ err(1, "fork");
+
+ if (child == 0) {
+ printf("[RUN]\tchild: check that we inherited permissions\n");
+ expect_ok(0x80);
+ expect_gp(0xed);
+ return 0;
+ } else {
+ int status;
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ printf("[FAIL]\tChild died\n");
+ nerrs++;
+ } else if (WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild succeeded\n");
+ }
+ }
+
+ /* Test the capability checks. */
+
+ printf("\tDrop privileges\n");
+ if (setresuid(1, 1, 1) != 0) {
+ printf("[WARN]\tDropping privileges failed\n");
+ return 0;
+ }
+
+ printf("[RUN]\tdisable 0x80\n");
+ if (ioperm(0x80, 1, 0) != 0) {
+ printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+ return 1;
+ }
+ printf("[OK]\tit worked\n");
+
+ printf("[RUN]\tenable 0x80 again\n");
+ if (ioperm(0x80, 1, 1) == 0) {
+ printf("[FAIL]\tit succeeded but should have failed.\n");
+ return 1;
+ }
+ printf("[OK]\tit failed\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c
new file mode 100644
index 000000000..6aa27f346
--- /dev/null
+++ b/tools/testing/selftests/x86/iopl.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * iopl.c - Test case for a Linux on Xen 64-bit bug
+ * Copyright (c) 2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <sched.h>
+#include <sys/io.h>
+
+static int nerrs = 0;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+ siglongjmp(jmpbuf, 1);
+}
+
+int main(void)
+{
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ err(1, "sched_setaffinity to CPU 0");
+
+ /* Probe for iopl support. Note that iopl(0) works even as nonroot. */
+ if (iopl(3) != 0) {
+ printf("[OK]\tiopl(3) failed (%d) -- try running as root\n",
+ errno);
+ return 0;
+ }
+
+ /* Restore our original state prior to starting the test. */
+ if (iopl(0) != 0)
+ err(1, "iopl(0)");
+
+ pid_t child = fork();
+ if (child == -1)
+ err(1, "fork");
+
+ if (child == 0) {
+ printf("\tchild: set IOPL to 3\n");
+ if (iopl(3) != 0)
+ err(1, "iopl");
+
+ printf("[RUN]\tchild: write to 0x80\n");
+ asm volatile ("outb %%al, $0x80" : : "a" (0));
+
+ return 0;
+ } else {
+ int status;
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ printf("[FAIL]\tChild died\n");
+ nerrs++;
+ } else if (WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild succeeded\n");
+ }
+ }
+
+ printf("[RUN]\tparent: write to 0x80 (should fail)\n");
+
+ sethandler(SIGSEGV, sigsegv, 0);
+ if (sigsetjmp(jmpbuf, 1) != 0) {
+ printf("[OK]\twrite was denied\n");
+ } else {
+ asm volatile ("outb %%al, $0x80" : : "a" (0));
+ printf("[FAIL]\twrite was allowed\n");
+ nerrs++;
+ }
+
+ /* Test the capability checks. */
+ printf("\tiopl(3)\n");
+ if (iopl(3) != 0)
+ err(1, "iopl(3)");
+
+ printf("\tDrop privileges\n");
+ if (setresuid(1, 1, 1) != 0) {
+ printf("[WARN]\tDropping privileges failed\n");
+ goto done;
+ }
+
+ printf("[RUN]\tiopl(3) unprivileged but with IOPL==3\n");
+ if (iopl(3) != 0) {
+ printf("[FAIL]\tiopl(3) should work if iopl is already 3 even if unprivileged\n");
+ nerrs++;
+ }
+
+ printf("[RUN]\tiopl(0) unprivileged\n");
+ if (iopl(0) != 0) {
+ printf("[FAIL]\tiopl(0) should work if iopl is already 3 even if unprivileged\n");
+ nerrs++;
+ }
+
+ printf("[RUN]\tiopl(3) unprivileged\n");
+ if (iopl(3) == 0) {
+ printf("[FAIL]\tiopl(3) should fail if when unprivileged if iopl==0\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tFailed as expected\n");
+ }
+
+done:
+ return nerrs ? 1 : 0;
+}
+
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
new file mode 100644
index 000000000..1aef72df2
--- /dev/null
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -0,0 +1,927 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ldt_gdt.c - Test cases for LDT and GDT access
+ * Copyright (c) 2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <asm/ldt.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <sched.h>
+#include <linux/futex.h>
+#include <sys/mman.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+
+#define AR_ACCESSED (1<<8)
+
+#define AR_TYPE_RODATA (0 * (1<<9))
+#define AR_TYPE_RWDATA (1 * (1<<9))
+#define AR_TYPE_RODATA_EXPDOWN (2 * (1<<9))
+#define AR_TYPE_RWDATA_EXPDOWN (3 * (1<<9))
+#define AR_TYPE_XOCODE (4 * (1<<9))
+#define AR_TYPE_XRCODE (5 * (1<<9))
+#define AR_TYPE_XOCODE_CONF (6 * (1<<9))
+#define AR_TYPE_XRCODE_CONF (7 * (1<<9))
+
+#define AR_DPL3 (3 * (1<<13))
+
+#define AR_S (1 << 12)
+#define AR_P (1 << 15)
+#define AR_AVL (1 << 20)
+#define AR_L (1 << 21)
+#define AR_DB (1 << 22)
+#define AR_G (1 << 23)
+
+#ifdef __x86_64__
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
+#else
+# define INT80_CLOBBERS
+#endif
+
+static int nerrs;
+
+/* Points to an array of 1024 ints, each holding its own index. */
+static const unsigned int *counter_page;
+static struct user_desc *low_user_desc;
+static struct user_desc *low_user_desc_clear; /* Use to delete GDT entry */
+static int gdt_entry_num;
+
+static void check_invalid_segment(uint16_t index, int ldt)
+{
+ uint32_t has_limit = 0, has_ar = 0, limit, ar;
+ uint32_t selector = (index << 3) | (ldt << 2) | 3;
+
+ asm ("lsl %[selector], %[limit]\n\t"
+ "jnz 1f\n\t"
+ "movl $1, %[has_limit]\n\t"
+ "1:"
+ : [limit] "=r" (limit), [has_limit] "+rm" (has_limit)
+ : [selector] "r" (selector));
+ asm ("larl %[selector], %[ar]\n\t"
+ "jnz 1f\n\t"
+ "movl $1, %[has_ar]\n\t"
+ "1:"
+ : [ar] "=r" (ar), [has_ar] "+rm" (has_ar)
+ : [selector] "r" (selector));
+
+ if (has_limit || has_ar) {
+ printf("[FAIL]\t%s entry %hu is valid but should be invalid\n",
+ (ldt ? "LDT" : "GDT"), index);
+ nerrs++;
+ } else {
+ printf("[OK]\t%s entry %hu is invalid\n",
+ (ldt ? "LDT" : "GDT"), index);
+ }
+}
+
+static void check_valid_segment(uint16_t index, int ldt,
+ uint32_t expected_ar, uint32_t expected_limit,
+ bool verbose)
+{
+ uint32_t has_limit = 0, has_ar = 0, limit, ar;
+ uint32_t selector = (index << 3) | (ldt << 2) | 3;
+
+ asm ("lsl %[selector], %[limit]\n\t"
+ "jnz 1f\n\t"
+ "movl $1, %[has_limit]\n\t"
+ "1:"
+ : [limit] "=r" (limit), [has_limit] "+rm" (has_limit)
+ : [selector] "r" (selector));
+ asm ("larl %[selector], %[ar]\n\t"
+ "jnz 1f\n\t"
+ "movl $1, %[has_ar]\n\t"
+ "1:"
+ : [ar] "=r" (ar), [has_ar] "+rm" (has_ar)
+ : [selector] "r" (selector));
+
+ if (!has_limit || !has_ar) {
+ printf("[FAIL]\t%s entry %hu is invalid but should be valid\n",
+ (ldt ? "LDT" : "GDT"), index);
+ nerrs++;
+ return;
+ }
+
+ /* The SDM says "bits 19:16 are undefined". Thanks. */
+ ar &= ~0xF0000;
+
+ /*
+ * NB: Different Linux versions do different things with the
+ * accessed bit in set_thread_area().
+ */
+ if (ar != expected_ar && ar != (expected_ar | AR_ACCESSED)) {
+ printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
+ (ldt ? "LDT" : "GDT"), index, ar, expected_ar);
+ nerrs++;
+ } else if (limit != expected_limit) {
+ printf("[FAIL]\t%s entry %hu has limit 0x%08X but expected 0x%08X\n",
+ (ldt ? "LDT" : "GDT"), index, limit, expected_limit);
+ nerrs++;
+ } else if (verbose) {
+ printf("[OK]\t%s entry %hu has AR 0x%08X and limit 0x%08X\n",
+ (ldt ? "LDT" : "GDT"), index, ar, limit);
+ }
+}
+
+static bool install_valid_mode(const struct user_desc *d, uint32_t ar,
+ bool oldmode, bool ldt)
+{
+ struct user_desc desc = *d;
+ int ret;
+
+ if (!ldt) {
+#ifndef __i386__
+ /* No point testing set_thread_area in a 64-bit build */
+ return false;
+#endif
+ if (!gdt_entry_num)
+ return false;
+ desc.entry_number = gdt_entry_num;
+
+ ret = syscall(SYS_set_thread_area, &desc);
+ } else {
+ ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
+ &desc, sizeof(desc));
+
+ if (ret < -1)
+ errno = -ret;
+
+ if (ret != 0 && errno == ENOSYS) {
+ printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+ return false;
+ }
+ }
+
+ if (ret == 0) {
+ uint32_t limit = desc.limit;
+ if (desc.limit_in_pages)
+ limit = (limit << 12) + 4095;
+ check_valid_segment(desc.entry_number, ldt, ar, limit, true);
+ return true;
+ } else {
+ if (desc.seg_32bit) {
+ printf("[FAIL]\tUnexpected %s failure %d\n",
+ ldt ? "modify_ldt" : "set_thread_area",
+ errno);
+ nerrs++;
+ return false;
+ } else {
+ printf("[OK]\t%s rejected 16 bit segment\n",
+ ldt ? "modify_ldt" : "set_thread_area");
+ return false;
+ }
+ }
+}
+
+static bool install_valid(const struct user_desc *desc, uint32_t ar)
+{
+ bool ret = install_valid_mode(desc, ar, false, true);
+
+ if (desc->contents <= 1 && desc->seg_32bit &&
+ !desc->seg_not_present) {
+ /* Should work in the GDT, too. */
+ install_valid_mode(desc, ar, false, false);
+ }
+
+ return ret;
+}
+
+static void install_invalid(const struct user_desc *desc, bool oldmode)
+{
+ int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
+ desc, sizeof(*desc));
+ if (ret < -1)
+ errno = -ret;
+ if (ret == 0) {
+ check_invalid_segment(desc->entry_number, 1);
+ } else if (errno == ENOSYS) {
+ printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+ } else {
+ if (desc->seg_32bit) {
+ printf("[FAIL]\tUnexpected modify_ldt failure %d\n",
+ errno);
+ nerrs++;
+ } else {
+ printf("[OK]\tmodify_ldt rejected 16 bit segment\n");
+ }
+ }
+}
+
+static int safe_modify_ldt(int func, struct user_desc *ptr,
+ unsigned long bytecount)
+{
+ int ret = syscall(SYS_modify_ldt, 0x11, ptr, bytecount);
+ if (ret < -1)
+ errno = -ret;
+ return ret;
+}
+
+static void fail_install(struct user_desc *desc)
+{
+ if (safe_modify_ldt(0x11, desc, sizeof(*desc)) == 0) {
+ printf("[FAIL]\tmodify_ldt accepted a bad descriptor\n");
+ nerrs++;
+ } else if (errno == ENOSYS) {
+ printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+ } else {
+ printf("[OK]\tmodify_ldt failure %d\n", errno);
+ }
+}
+
+static void do_simple_tests(void)
+{
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = 0,
+ .limit = 10,
+ .seg_32bit = 1,
+ .contents = 2, /* Code, not conforming */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB);
+
+ desc.limit_in_pages = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_P | AR_DB | AR_G);
+
+ check_invalid_segment(1, 1);
+
+ desc.entry_number = 2;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_P | AR_DB | AR_G);
+
+ check_invalid_segment(1, 1);
+
+ desc.base_addr = 0xf0000000;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_P | AR_DB | AR_G);
+
+ desc.useable = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_P | AR_DB | AR_G | AR_AVL);
+
+ desc.seg_not_present = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_DB | AR_G | AR_AVL);
+
+ desc.seg_32bit = 0;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_G | AR_AVL);
+
+ desc.seg_32bit = 1;
+ desc.contents = 0;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA |
+ AR_S | AR_DB | AR_G | AR_AVL);
+
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA |
+ AR_S | AR_DB | AR_G | AR_AVL);
+
+ desc.contents = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN |
+ AR_S | AR_DB | AR_G | AR_AVL);
+
+ desc.read_exec_only = 0;
+ desc.limit_in_pages = 0;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN |
+ AR_S | AR_DB | AR_AVL);
+
+ desc.contents = 3;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE_CONF |
+ AR_S | AR_DB | AR_AVL);
+
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE_CONF |
+ AR_S | AR_DB | AR_AVL);
+
+ desc.read_exec_only = 0;
+ desc.contents = 2;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_DB | AR_AVL);
+
+ desc.read_exec_only = 1;
+
+#ifdef __x86_64__
+ desc.lm = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE |
+ AR_S | AR_DB | AR_AVL);
+ desc.lm = 0;
+#endif
+
+ bool entry1_okay = install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE |
+ AR_S | AR_DB | AR_AVL);
+
+ if (entry1_okay) {
+ printf("[RUN]\tTest fork\n");
+ pid_t child = fork();
+ if (child == 0) {
+ nerrs = 0;
+ check_valid_segment(desc.entry_number, 1,
+ AR_DPL3 | AR_TYPE_XOCODE |
+ AR_S | AR_DB | AR_AVL, desc.limit,
+ true);
+ check_invalid_segment(1, 1);
+ exit(nerrs ? 1 : 0);
+ } else {
+ int status;
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ printf("[FAIL]\tChild died\n");
+ nerrs++;
+ } else if (WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild succeeded\n");
+ }
+ }
+
+ printf("[RUN]\tTest size\n");
+ int i;
+ for (i = 0; i < 8192; i++) {
+ desc.entry_number = i;
+ desc.limit = i;
+ if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) {
+ printf("[FAIL]\tFailed to install entry %d\n", i);
+ nerrs++;
+ break;
+ }
+ }
+ for (int j = 0; j < i; j++) {
+ check_valid_segment(j, 1, AR_DPL3 | AR_TYPE_XOCODE |
+ AR_S | AR_DB | AR_AVL, j, false);
+ }
+ printf("[DONE]\tSize test\n");
+ } else {
+ printf("[SKIP]\tSkipping fork and size tests because we have no LDT\n");
+ }
+
+ /* Test entry_number too high. */
+ desc.entry_number = 8192;
+ fail_install(&desc);
+
+ /* Test deletion and actions mistakeable for deletion. */
+ memset(&desc, 0, sizeof(desc));
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P);
+
+ desc.seg_not_present = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S);
+
+ desc.seg_not_present = 0;
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P);
+
+ desc.read_exec_only = 0;
+ desc.seg_not_present = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S);
+
+ desc.read_exec_only = 1;
+ desc.limit = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S);
+
+ desc.limit = 0;
+ desc.base_addr = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S);
+
+ desc.base_addr = 0;
+ install_invalid(&desc, false);
+
+ desc.seg_not_present = 0;
+ desc.seg_32bit = 1;
+ desc.read_exec_only = 0;
+ desc.limit = 0xfffff;
+
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB);
+
+ desc.limit_in_pages = 1;
+
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB | AR_G);
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P | AR_DB | AR_G);
+ desc.contents = 1;
+ desc.read_exec_only = 0;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
+
+ desc.limit = 0;
+ install_invalid(&desc, true);
+}
+
+/*
+ * 0: thread is idle
+ * 1: thread armed
+ * 2: thread should clear LDT entry 0
+ * 3: thread should exit
+ */
+static volatile unsigned int ftx;
+
+static void *threadproc(void *ctx)
+{
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(1, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ err(1, "sched_setaffinity to CPU 1"); /* should never fail */
+
+ while (1) {
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0);
+ while (ftx != 2) {
+ if (ftx >= 3)
+ return NULL;
+ }
+
+ /* clear LDT entry 0 */
+ const struct user_desc desc = {};
+ if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) != 0)
+ err(1, "modify_ldt");
+
+ /* If ftx == 2, set it to zero. If ftx == 100, quit. */
+ unsigned int x = -2;
+ asm volatile ("lock xaddl %[x], %[ftx]" :
+ [x] "+r" (x), [ftx] "+m" (ftx));
+ if (x != 2)
+ return NULL;
+ }
+}
+
+#ifdef __i386__
+
+#ifndef SA_RESTORE
+#define SA_RESTORER 0x04000000
+#endif
+
+/*
+ * The UAPI header calls this 'struct sigaction', which conflicts with
+ * glibc. Sigh.
+ */
+struct fake_ksigaction {
+ void *handler; /* the real type is nasty */
+ unsigned long sa_flags;
+ void (*sa_restorer)(void);
+ unsigned char sigset[8];
+};
+
+static void fix_sa_restorer(int sig)
+{
+ struct fake_ksigaction ksa;
+
+ if (syscall(SYS_rt_sigaction, sig, NULL, &ksa, 8) == 0) {
+ /*
+ * glibc has a nasty bug: it sometimes writes garbage to
+ * sa_restorer. This interacts quite badly with anything
+ * that fiddles with SS because it can trigger legacy
+ * stack switching. Patch it up. See:
+ *
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=21269
+ */
+ if (!(ksa.sa_flags & SA_RESTORER) && ksa.sa_restorer) {
+ ksa.sa_restorer = NULL;
+ if (syscall(SYS_rt_sigaction, sig, &ksa, NULL,
+ sizeof(ksa.sigset)) != 0)
+ err(1, "rt_sigaction");
+ }
+ }
+}
+#else
+static void fix_sa_restorer(int sig)
+{
+ /* 64-bit glibc works fine. */
+}
+#endif
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+
+ fix_sa_restorer(sig);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ siglongjmp(jmpbuf, 1);
+}
+
+static void do_multicpu_tests(void)
+{
+ cpu_set_t cpuset;
+ pthread_t thread;
+ int failures = 0, iters = 5, i;
+ unsigned short orig_ss;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(1, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+ printf("[SKIP]\tCannot set affinity to CPU 1\n");
+ return;
+ }
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+ printf("[SKIP]\tCannot set affinity to CPU 0\n");
+ return;
+ }
+
+ sethandler(SIGSEGV, sigsegv, 0);
+#ifdef __i386__
+ /* True 32-bit kernels send SIGILL instead of SIGSEGV on IRET faults. */
+ sethandler(SIGILL, sigsegv, 0);
+#endif
+
+ printf("[RUN]\tCross-CPU LDT invalidation\n");
+
+ if (pthread_create(&thread, 0, threadproc, 0) != 0)
+ err(1, "pthread_create");
+
+ asm volatile ("mov %%ss, %0" : "=rm" (orig_ss));
+
+ for (i = 0; i < 5; i++) {
+ if (sigsetjmp(jmpbuf, 1) != 0)
+ continue;
+
+ /* Make sure the thread is ready after the last test. */
+ while (ftx != 0)
+ ;
+
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = 0,
+ .limit = 0xfffff,
+ .seg_32bit = 1,
+ .contents = 0, /* Data */
+ .read_exec_only = 0,
+ .limit_in_pages = 1,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+
+ if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) {
+ if (errno != ENOSYS)
+ err(1, "modify_ldt");
+ printf("[SKIP]\tmodify_ldt unavailable\n");
+ break;
+ }
+
+ /* Arm the thread. */
+ ftx = 1;
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+
+ asm volatile ("mov %0, %%ss" : : "r" (0x7));
+
+ /* Go! */
+ ftx = 2;
+
+ while (ftx != 0)
+ ;
+
+ /*
+ * On success, modify_ldt will segfault us synchronously,
+ * and we'll escape via siglongjmp.
+ */
+
+ failures++;
+ asm volatile ("mov %0, %%ss" : : "rm" (orig_ss));
+ };
+
+ ftx = 100; /* Kill the thread. */
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+
+ if (pthread_join(thread, NULL) != 0)
+ err(1, "pthread_join");
+
+ if (failures) {
+ printf("[FAIL]\t%d of %d iterations failed\n", failures, iters);
+ nerrs++;
+ } else {
+ printf("[OK]\tAll %d iterations succeeded\n", iters);
+ }
+}
+
+static int finish_exec_test(void)
+{
+ /*
+ * Older kernel versions did inherit the LDT on exec() which is
+ * wrong because exec() starts from a clean state.
+ */
+ check_invalid_segment(0, 1);
+
+ return nerrs ? 1 : 0;
+}
+
+static void do_exec_test(void)
+{
+ printf("[RUN]\tTest exec\n");
+
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = 0,
+ .limit = 42,
+ .seg_32bit = 1,
+ .contents = 2, /* Code, not conforming */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB);
+
+ pid_t child = fork();
+ if (child == 0) {
+ execl("/proc/self/exe", "ldt_gdt_test_exec", NULL);
+ printf("[FAIL]\tCould not exec self\n");
+ exit(1); /* exec failed */
+ } else {
+ int status;
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ printf("[FAIL]\tChild died\n");
+ nerrs++;
+ } else if (WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild succeeded\n");
+ }
+ }
+}
+
+static void setup_counter_page(void)
+{
+ unsigned int *page = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0);
+ if (page == MAP_FAILED)
+ err(1, "mmap");
+
+ for (int i = 0; i < 1024; i++)
+ page[i] = i;
+ counter_page = page;
+}
+
+static int invoke_set_thread_area(void)
+{
+ int ret;
+ asm volatile ("int $0x80"
+ : "=a" (ret), "+m" (low_user_desc) :
+ "a" (243), "b" (low_user_desc)
+ : INT80_CLOBBERS);
+ return ret;
+}
+
+static void setup_low_user_desc(void)
+{
+ low_user_desc = mmap(NULL, 2 * sizeof(struct user_desc),
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0);
+ if (low_user_desc == MAP_FAILED)
+ err(1, "mmap");
+
+ low_user_desc->entry_number = -1;
+ low_user_desc->base_addr = (unsigned long)&counter_page[1];
+ low_user_desc->limit = 0xfffff;
+ low_user_desc->seg_32bit = 1;
+ low_user_desc->contents = 0; /* Data, grow-up*/
+ low_user_desc->read_exec_only = 0;
+ low_user_desc->limit_in_pages = 1;
+ low_user_desc->seg_not_present = 0;
+ low_user_desc->useable = 0;
+
+ if (invoke_set_thread_area() == 0) {
+ gdt_entry_num = low_user_desc->entry_number;
+ printf("[NOTE]\tset_thread_area is available; will use GDT index %d\n", gdt_entry_num);
+ } else {
+ printf("[NOTE]\tset_thread_area is unavailable\n");
+ }
+
+ low_user_desc_clear = low_user_desc + 1;
+ low_user_desc_clear->entry_number = gdt_entry_num;
+ low_user_desc_clear->read_exec_only = 1;
+ low_user_desc_clear->seg_not_present = 1;
+}
+
+static void test_gdt_invalidation(void)
+{
+ if (!gdt_entry_num)
+ return; /* 64-bit only system -- we can't use set_thread_area */
+
+ unsigned short prev_sel;
+ unsigned short sel;
+ unsigned int eax;
+ const char *result;
+#ifdef __x86_64__
+ unsigned long saved_base;
+ unsigned long new_base;
+#endif
+
+ /* Test DS */
+ invoke_set_thread_area();
+ eax = 243;
+ sel = (gdt_entry_num << 3) | 3;
+ asm volatile ("movw %%ds, %[prev_sel]\n\t"
+ "movw %[sel], %%ds\n\t"
+#ifdef __i386__
+ "pushl %%ebx\n\t"
+#endif
+ "movl %[arg1], %%ebx\n\t"
+ "int $0x80\n\t" /* Should invalidate ds */
+#ifdef __i386__
+ "popl %%ebx\n\t"
+#endif
+ "movw %%ds, %[sel]\n\t"
+ "movw %[prev_sel], %%ds"
+ : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+ "+a" (eax)
+ : "m" (low_user_desc_clear),
+ [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+ : INT80_CLOBBERS);
+
+ if (sel != 0) {
+ result = "FAIL";
+ nerrs++;
+ } else {
+ result = "OK";
+ }
+ printf("[%s]\tInvalidate DS with set_thread_area: new DS = 0x%hx\n",
+ result, sel);
+
+ /* Test ES */
+ invoke_set_thread_area();
+ eax = 243;
+ sel = (gdt_entry_num << 3) | 3;
+ asm volatile ("movw %%es, %[prev_sel]\n\t"
+ "movw %[sel], %%es\n\t"
+#ifdef __i386__
+ "pushl %%ebx\n\t"
+#endif
+ "movl %[arg1], %%ebx\n\t"
+ "int $0x80\n\t" /* Should invalidate es */
+#ifdef __i386__
+ "popl %%ebx\n\t"
+#endif
+ "movw %%es, %[sel]\n\t"
+ "movw %[prev_sel], %%es"
+ : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+ "+a" (eax)
+ : "m" (low_user_desc_clear),
+ [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+ : INT80_CLOBBERS);
+
+ if (sel != 0) {
+ result = "FAIL";
+ nerrs++;
+ } else {
+ result = "OK";
+ }
+ printf("[%s]\tInvalidate ES with set_thread_area: new ES = 0x%hx\n",
+ result, sel);
+
+ /* Test FS */
+ invoke_set_thread_area();
+ eax = 243;
+ sel = (gdt_entry_num << 3) | 3;
+#ifdef __x86_64__
+ syscall(SYS_arch_prctl, ARCH_GET_FS, &saved_base);
+#endif
+ asm volatile ("movw %%fs, %[prev_sel]\n\t"
+ "movw %[sel], %%fs\n\t"
+#ifdef __i386__
+ "pushl %%ebx\n\t"
+#endif
+ "movl %[arg1], %%ebx\n\t"
+ "int $0x80\n\t" /* Should invalidate fs */
+#ifdef __i386__
+ "popl %%ebx\n\t"
+#endif
+ "movw %%fs, %[sel]\n\t"
+ : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+ "+a" (eax)
+ : "m" (low_user_desc_clear),
+ [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+ : INT80_CLOBBERS);
+
+#ifdef __x86_64__
+ syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base);
+#endif
+
+ /* Restore FS/BASE for glibc */
+ asm volatile ("movw %[prev_sel], %%fs" : : [prev_sel] "rm" (prev_sel));
+#ifdef __x86_64__
+ if (saved_base)
+ syscall(SYS_arch_prctl, ARCH_SET_FS, saved_base);
+#endif
+
+ if (sel != 0) {
+ result = "FAIL";
+ nerrs++;
+ } else {
+ result = "OK";
+ }
+ printf("[%s]\tInvalidate FS with set_thread_area: new FS = 0x%hx\n",
+ result, sel);
+
+#ifdef __x86_64__
+ if (sel == 0 && new_base != 0) {
+ nerrs++;
+ printf("[FAIL]\tNew FSBASE was 0x%lx\n", new_base);
+ } else {
+ printf("[OK]\tNew FSBASE was zero\n");
+ }
+#endif
+
+ /* Test GS */
+ invoke_set_thread_area();
+ eax = 243;
+ sel = (gdt_entry_num << 3) | 3;
+#ifdef __x86_64__
+ syscall(SYS_arch_prctl, ARCH_GET_GS, &saved_base);
+#endif
+ asm volatile ("movw %%gs, %[prev_sel]\n\t"
+ "movw %[sel], %%gs\n\t"
+#ifdef __i386__
+ "pushl %%ebx\n\t"
+#endif
+ "movl %[arg1], %%ebx\n\t"
+ "int $0x80\n\t" /* Should invalidate gs */
+#ifdef __i386__
+ "popl %%ebx\n\t"
+#endif
+ "movw %%gs, %[sel]\n\t"
+ : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+ "+a" (eax)
+ : "m" (low_user_desc_clear),
+ [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+ : INT80_CLOBBERS);
+
+#ifdef __x86_64__
+ syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base);
+#endif
+
+ /* Restore GS/BASE for glibc */
+ asm volatile ("movw %[prev_sel], %%gs" : : [prev_sel] "rm" (prev_sel));
+#ifdef __x86_64__
+ if (saved_base)
+ syscall(SYS_arch_prctl, ARCH_SET_GS, saved_base);
+#endif
+
+ if (sel != 0) {
+ result = "FAIL";
+ nerrs++;
+ } else {
+ result = "OK";
+ }
+ printf("[%s]\tInvalidate GS with set_thread_area: new GS = 0x%hx\n",
+ result, sel);
+
+#ifdef __x86_64__
+ if (sel == 0 && new_base != 0) {
+ nerrs++;
+ printf("[FAIL]\tNew GSBASE was 0x%lx\n", new_base);
+ } else {
+ printf("[OK]\tNew GSBASE was zero\n");
+ }
+#endif
+}
+
+int main(int argc, char **argv)
+{
+ if (argc == 1 && !strcmp(argv[0], "ldt_gdt_test_exec"))
+ return finish_exec_test();
+
+ setup_counter_page();
+ setup_low_user_desc();
+
+ do_simple_tests();
+
+ do_multicpu_tests();
+
+ do_exec_test();
+
+ test_gdt_invalidation();
+
+ return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c
new file mode 100644
index 000000000..3c3a02265
--- /dev/null
+++ b/tools/testing/selftests/x86/mov_ss_trap.c
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS
+ *
+ * This does MOV SS from a watchpointed address followed by various
+ * types of kernel entries. A MOV SS that hits a watchpoint will queue
+ * up a #DB trap but will not actually deliver that trap. The trap
+ * will be delivered after the next instruction instead. The CPU's logic
+ * seems to be:
+ *
+ * - Any fault: drop the pending #DB trap.
+ * - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then
+ * deliver #DB.
+ * - ICEBP: enter the kernel but do not deliver the watchpoint trap
+ * - breakpoint: only one #DB is delivered (phew!)
+ *
+ * There are plenty of ways for a kernel to handle this incorrectly. This
+ * test tries to exercise all the cases.
+ *
+ * This should mostly cover CVE-2018-1087 and CVE-2018-8897.
+ */
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/user.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <err.h>
+#include <string.h>
+#include <setjmp.h>
+#include <sys/prctl.h>
+
+#define X86_EFLAGS_RF (1UL << 16)
+
+#if __x86_64__
+# define REG_IP REG_RIP
+#else
+# define REG_IP REG_EIP
+#endif
+
+unsigned short ss;
+extern unsigned char breakpoint_insn[];
+sigjmp_buf jmpbuf;
+static unsigned char altstack_data[SIGSTKSZ];
+
+static void enable_watchpoint(void)
+{
+ pid_t parent = getpid();
+ int status;
+
+ pid_t child = fork();
+ if (child < 0)
+ err(1, "fork");
+
+ if (child) {
+ if (waitpid(child, &status, 0) != child)
+ err(1, "waitpid for child");
+ } else {
+ unsigned long dr0, dr1, dr7;
+
+ dr0 = (unsigned long)&ss;
+ dr1 = (unsigned long)breakpoint_insn;
+ dr7 = ((1UL << 1) | /* G0 */
+ (3UL << 16) | /* RW0 = read or write */
+ (1UL << 18) | /* LEN0 = 2 bytes */
+ (1UL << 3)); /* G1, RW1 = insn */
+
+ if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0)
+ err(1, "PTRACE_ATTACH");
+
+ if (waitpid(parent, &status, 0) != parent)
+ err(1, "waitpid for child");
+
+ if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0)
+ err(1, "PTRACE_POKEUSER DR0");
+
+ if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0)
+ err(1, "PTRACE_POKEUSER DR1");
+
+ if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0)
+ err(1, "PTRACE_POKEUSER DR7");
+
+ printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7);
+
+ if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0)
+ err(1, "PTRACE_DETACH");
+
+ exit(0);
+ }
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static char const * const signames[] = {
+ [SIGSEGV] = "SIGSEGV",
+ [SIGBUS] = "SIBGUS",
+ [SIGTRAP] = "SIGTRAP",
+ [SIGILL] = "SIGILL",
+};
+
+static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = ctx_void;
+
+ printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n",
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP],
+ !!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF));
+}
+
+static void handle_and_return(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = ctx_void;
+
+ printf("\tGot %s with RIP=%lx\n", signames[sig],
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+}
+
+static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = ctx_void;
+
+ printf("\tGot %s with RIP=%lx\n", signames[sig],
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+
+ siglongjmp(jmpbuf, 1);
+}
+
+int main()
+{
+ unsigned long nr;
+
+ asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss));
+ printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss);
+
+ if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0)
+ printf("\tPR_SET_PTRACER_ANY succeeded\n");
+
+ printf("\tSet up a watchpoint\n");
+ sethandler(SIGTRAP, sigtrap, 0);
+ enable_watchpoint();
+
+ printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n");
+ asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; INT3\n");
+ asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; INT 3\n");
+ asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; CS CS INT3\n");
+ asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; CSx14 INT3\n");
+ asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; INT 4\n");
+ sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
+ asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss));
+
+#ifdef __i386__
+ printf("[RUN]\tMOV SS; INTO\n");
+ sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
+ nr = -1;
+ asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into"
+ : [tmp] "+r" (nr) : [ss] "m" (ss));
+#endif
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; ICEBP\n");
+
+ /* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */
+ sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
+
+ asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss));
+ }
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; CLI\n");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+ asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss));
+ }
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; #PF\n");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+ asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]"
+ : [tmp] "=r" (nr) : [ss] "m" (ss));
+ }
+
+ /*
+ * INT $1: if #DB has DPL=3 and there isn't special handling,
+ * then the kernel will die.
+ */
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; INT 1\n");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+ asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss));
+ }
+
+#ifdef __x86_64__
+ /*
+ * In principle, we should test 32-bit SYSCALL as well, but
+ * the calling convention is so unpredictable that it's
+ * not obviously worth the effort.
+ */
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; SYSCALL\n");
+ sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
+ nr = SYS_getpid;
+ /*
+ * Toggle the high bit of RSP to make it noncanonical to
+ * strengthen this test on non-SMAP systems.
+ */
+ asm volatile ("btc $63, %%rsp\n\t"
+ "mov %[ss], %%ss; syscall\n\t"
+ "btc $63, %%rsp"
+ : "+a" (nr) : [ss] "m" (ss)
+ : "rcx"
+#ifdef __x86_64__
+ , "r11"
+#endif
+ );
+ }
+#endif
+
+ printf("[RUN]\tMOV SS; breakpointed NOP\n");
+ asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss));
+
+ /*
+ * Invoking SYSENTER directly breaks all the rules. Just handle
+ * the SIGSEGV.
+ */
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; SYSENTER\n");
+ stack_t stack = {
+ .ss_sp = altstack_data,
+ .ss_size = SIGSTKSZ,
+ };
+ if (sigaltstack(&stack, NULL) != 0)
+ err(1, "sigaltstack");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
+ nr = SYS_getpid;
+ asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr)
+ : [ss] "m" (ss) : "flags", "rcx"
+#ifdef __x86_64__
+ , "r11"
+#endif
+ );
+
+ /* We're unreachable here. SYSENTER forgets RIP. */
+ }
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; INT $0x80\n");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+ nr = 20; /* compat getpid */
+ asm volatile ("mov %[ss], %%ss; int $0x80"
+ : "+a" (nr) : [ss] "m" (ss)
+ : "flags"
+#ifdef __x86_64__
+ , "r8", "r9", "r10", "r11"
+#endif
+ );
+ }
+
+ printf("[OK]\tI aten't dead\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/mpx-debug.h b/tools/testing/selftests/x86/mpx-debug.h
new file mode 100644
index 000000000..7546eba7f
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-debug.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MPX_DEBUG_H
+#define _MPX_DEBUG_H
+
+#ifndef DEBUG_LEVEL
+#define DEBUG_LEVEL 0
+#endif
+#define dprintf_level(level, args...) do { if(level <= DEBUG_LEVEL) printf(args); } while(0)
+#define dprintf1(args...) dprintf_level(1, args)
+#define dprintf2(args...) dprintf_level(2, args)
+#define dprintf3(args...) dprintf_level(3, args)
+#define dprintf4(args...) dprintf_level(4, args)
+#define dprintf5(args...) dprintf_level(5, args)
+
+#endif /* _MPX_DEBUG_H */
diff --git a/tools/testing/selftests/x86/mpx-dig.c b/tools/testing/selftests/x86/mpx-dig.c
new file mode 100644
index 000000000..c13607ef5
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-dig.c
@@ -0,0 +1,499 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Written by Dave Hansen <dave.hansen@intel.com>
+ */
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <fcntl.h>
+#include "mpx-debug.h"
+#include "mpx-mm.h"
+#include "mpx-hw.h"
+
+unsigned long bounds_dir_global;
+
+#define mpx_dig_abort() __mpx_dig_abort(__FILE__, __func__, __LINE__)
+static void inline __mpx_dig_abort(const char *file, const char *func, int line)
+{
+ fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func);
+ printf("MPX dig abort @ %s::%d in %s()\n", file, line, func);
+ abort();
+}
+
+/*
+ * run like this (BDIR finds the probably bounds directory):
+ *
+ * BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \
+ * | head -1 | awk -F- '{print $1}')";
+ * ./mpx-dig $pid 0x$BDIR
+ *
+ * NOTE:
+ * assumes that the only 2097152-kb VMA is the bounds dir
+ */
+
+long nr_incore(void *ptr, unsigned long size_bytes)
+{
+ int i;
+ long ret = 0;
+ long vec_len = size_bytes / PAGE_SIZE;
+ unsigned char *vec = malloc(vec_len);
+ int incore_ret;
+
+ if (!vec)
+ mpx_dig_abort();
+
+ incore_ret = mincore(ptr, size_bytes, vec);
+ if (incore_ret) {
+ printf("mincore ret: %d\n", incore_ret);
+ perror("mincore");
+ mpx_dig_abort();
+ }
+ for (i = 0; i < vec_len; i++)
+ ret += vec[i];
+ free(vec);
+ return ret;
+}
+
+int open_proc(int pid, char *file)
+{
+ static char buf[100];
+ int fd;
+
+ snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file);
+ fd = open(&buf[0], O_RDONLY);
+ if (fd < 0)
+ perror(buf);
+
+ return fd;
+}
+
+struct vaddr_range {
+ unsigned long start;
+ unsigned long end;
+};
+struct vaddr_range *ranges;
+int nr_ranges_allocated;
+int nr_ranges_populated;
+int last_range = -1;
+
+int __pid_load_vaddrs(int pid)
+{
+ int ret = 0;
+ int proc_maps_fd = open_proc(pid, "maps");
+ char linebuf[10000];
+ unsigned long start;
+ unsigned long end;
+ char rest[1000];
+ FILE *f = fdopen(proc_maps_fd, "r");
+
+ if (!f)
+ mpx_dig_abort();
+ nr_ranges_populated = 0;
+ while (!feof(f)) {
+ char *readret = fgets(linebuf, sizeof(linebuf), f);
+ int parsed;
+
+ if (readret == NULL) {
+ if (feof(f))
+ break;
+ mpx_dig_abort();
+ }
+
+ parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest);
+ if (parsed != 3)
+ mpx_dig_abort();
+
+ dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest);
+ if (nr_ranges_populated >= nr_ranges_allocated) {
+ ret = -E2BIG;
+ break;
+ }
+ ranges[nr_ranges_populated].start = start;
+ ranges[nr_ranges_populated].end = end;
+ nr_ranges_populated++;
+ }
+ last_range = -1;
+ fclose(f);
+ close(proc_maps_fd);
+ return ret;
+}
+
+int pid_load_vaddrs(int pid)
+{
+ int ret;
+
+ dprintf2("%s(%d)\n", __func__, pid);
+ if (!ranges) {
+ nr_ranges_allocated = 4;
+ ranges = malloc(nr_ranges_allocated * sizeof(ranges[0]));
+ dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid,
+ nr_ranges_allocated, ranges);
+ assert(ranges != NULL);
+ }
+ do {
+ ret = __pid_load_vaddrs(pid);
+ if (!ret)
+ break;
+ if (ret == -E2BIG) {
+ dprintf2("%s(%d) need to realloc\n", __func__, pid);
+ nr_ranges_allocated *= 2;
+ ranges = realloc(ranges,
+ nr_ranges_allocated * sizeof(ranges[0]));
+ dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__,
+ pid, nr_ranges_allocated, ranges);
+ assert(ranges != NULL);
+ dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated);
+ }
+ } while (1);
+
+ dprintf2("%s(%d) done\n", __func__, pid);
+
+ return ret;
+}
+
+static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r)
+{
+ if (vaddr < r->start)
+ return 0;
+ if (vaddr >= r->end)
+ return 0;
+ return 1;
+}
+
+static inline int vaddr_mapped_by_range(unsigned long vaddr)
+{
+ int i;
+
+ if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range]))
+ return 1;
+
+ for (i = 0; i < nr_ranges_populated; i++) {
+ struct vaddr_range *r = &ranges[i];
+
+ if (vaddr_in_range(vaddr, r))
+ continue;
+ last_range = i;
+ return 1;
+ }
+ return 0;
+}
+
+const int bt_entry_size_bytes = sizeof(unsigned long) * 4;
+
+void *read_bounds_table_into_buf(unsigned long table_vaddr)
+{
+#ifdef MPX_DIG_STANDALONE
+ static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES];
+ off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET);
+ if (seek_ret != table_vaddr)
+ mpx_dig_abort();
+
+ int read_ret = read(fd, &bt_buf, sizeof(bt_buf));
+ if (read_ret != sizeof(bt_buf))
+ mpx_dig_abort();
+ return &bt_buf;
+#else
+ return (void *)table_vaddr;
+#endif
+}
+
+int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr,
+ unsigned long bde_vaddr)
+{
+ unsigned long offset_inside_bt;
+ int nr_entries = 0;
+ int do_abort = 0;
+ char *bt_buf;
+
+ dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n",
+ __func__, base_controlled_vaddr, bde_vaddr);
+
+ bt_buf = read_bounds_table_into_buf(table_vaddr);
+
+ dprintf4("%s() read done\n", __func__);
+
+ for (offset_inside_bt = 0;
+ offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES;
+ offset_inside_bt += bt_entry_size_bytes) {
+ unsigned long bt_entry_index;
+ unsigned long bt_entry_controls;
+ unsigned long this_bt_entry_for_vaddr;
+ unsigned long *bt_entry_buf;
+ int i;
+
+ dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__,
+ offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES);
+ bt_entry_buf = (void *)&bt_buf[offset_inside_bt];
+ if (!bt_buf) {
+ printf("null bt_buf\n");
+ mpx_dig_abort();
+ }
+ if (!bt_entry_buf) {
+ printf("null bt_entry_buf\n");
+ mpx_dig_abort();
+ }
+ dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__,
+ bt_entry_buf);
+ if (!bt_entry_buf[0] &&
+ !bt_entry_buf[1] &&
+ !bt_entry_buf[2] &&
+ !bt_entry_buf[3])
+ continue;
+
+ nr_entries++;
+
+ bt_entry_index = offset_inside_bt/bt_entry_size_bytes;
+ bt_entry_controls = sizeof(void *);
+ this_bt_entry_for_vaddr =
+ base_controlled_vaddr + bt_entry_index*bt_entry_controls;
+ /*
+ * We sign extend vaddr bits 48->63 which effectively
+ * creates a hole in the virtual address space.
+ * This calculation corrects for the hole.
+ */
+ if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL)
+ this_bt_entry_for_vaddr |= 0xffff800000000000;
+
+ if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) {
+ printf("bt_entry_buf: %p\n", bt_entry_buf);
+ printf("there is a bte for %lx but no mapping\n",
+ this_bt_entry_for_vaddr);
+ printf(" bde vaddr: %016lx\n", bde_vaddr);
+ printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr);
+ printf(" table_vaddr: %016lx\n", table_vaddr);
+ printf(" entry vaddr: %016lx @ offset %lx\n",
+ table_vaddr + offset_inside_bt, offset_inside_bt);
+ do_abort = 1;
+ mpx_dig_abort();
+ }
+ if (DEBUG_LEVEL < 4)
+ continue;
+
+ printf("table entry[%lx]: ", offset_inside_bt);
+ for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long))
+ printf("0x%016lx ", bt_entry_buf[i]);
+ printf("\n");
+ }
+ if (do_abort)
+ mpx_dig_abort();
+ dprintf4("%s() done\n", __func__);
+ return nr_entries;
+}
+
+int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes,
+ int *nr_populated_bdes)
+{
+ unsigned long i;
+ int total_entries = 0;
+
+ dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf,
+ len_bytes, bd_offset_bytes, buf + len_bytes);
+
+ for (i = 0; i < len_bytes; i += sizeof(unsigned long)) {
+ unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long);
+ unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i];
+ unsigned long bounds_dir_entry;
+ unsigned long bd_for_vaddr;
+ unsigned long bt_start;
+ unsigned long bt_tail;
+ int nr_entries;
+
+ dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i,
+ bounds_dir_entry_ptr);
+
+ bounds_dir_entry = *bounds_dir_entry_ptr;
+ if (!bounds_dir_entry) {
+ dprintf4("no bounds dir at index 0x%lx / 0x%lx "
+ "start at offset:%lx %lx\n", bd_index, bd_index,
+ bd_offset_bytes, i);
+ continue;
+ }
+ dprintf3("found bounds_dir_entry: 0x%lx @ "
+ "index 0x%lx buf ptr: %p\n", bounds_dir_entry, i,
+ &buf[i]);
+ /* mask off the enable bit: */
+ bounds_dir_entry &= ~0x1;
+ (*nr_populated_bdes)++;
+ dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes);
+ dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes);
+
+ bt_start = bounds_dir_entry;
+ bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1;
+ if (!vaddr_mapped_by_range(bt_start)) {
+ printf("bounds directory 0x%lx points to nowhere\n",
+ bounds_dir_entry);
+ mpx_dig_abort();
+ }
+ if (!vaddr_mapped_by_range(bt_tail)) {
+ printf("bounds directory end 0x%lx points to nowhere\n",
+ bt_tail);
+ mpx_dig_abort();
+ }
+ /*
+ * Each bounds directory entry controls 1MB of virtual address
+ * space. This variable is the virtual address in the process
+ * of the beginning of the area controlled by this bounds_dir.
+ */
+ bd_for_vaddr = bd_index * (1UL<<20);
+
+ nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr,
+ bounds_dir_global+bd_offset_bytes+i);
+ total_entries += nr_entries;
+ dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries "
+ "total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n",
+ bd_index, buf+i,
+ bounds_dir_entry, nr_entries, total_entries,
+ bd_for_vaddr, bd_for_vaddr + (1UL<<20));
+ }
+ dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes,
+ bd_offset_bytes);
+ return total_entries;
+}
+
+int proc_pid_mem_fd = -1;
+
+void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir,
+ long buffer_size_bytes, void *buffer)
+{
+ unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir;
+ int read_ret;
+ off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET);
+
+ if (seek_ret != seekto)
+ mpx_dig_abort();
+
+ read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes);
+ /* there shouldn't practically be short reads of /proc/$pid/mem */
+ if (read_ret != buffer_size_bytes)
+ mpx_dig_abort();
+
+ return buffer;
+}
+void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir,
+ long buffer_size_bytes, void *buffer)
+
+{
+ unsigned char vec[buffer_size_bytes / PAGE_SIZE];
+ char *dig_bounds_dir_ptr =
+ (void *)(bounds_dir_global + byte_offset_inside_bounds_dir);
+ /*
+ * use mincore() to quickly find the areas of the bounds directory
+ * that have memory and thus will be worth scanning.
+ */
+ int incore_ret;
+
+ int incore = 0;
+ int i;
+
+ dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr);
+
+ incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]);
+ if (incore_ret) {
+ printf("mincore ret: %d\n", incore_ret);
+ perror("mincore");
+ mpx_dig_abort();
+ }
+ for (i = 0; i < sizeof(vec); i++)
+ incore += vec[i];
+ dprintf4("%s() total incore: %d\n", __func__, incore);
+ if (!incore)
+ return NULL;
+ dprintf3("%s() total incore: %d\n", __func__, incore);
+ return dig_bounds_dir_ptr;
+}
+
+int inspect_pid(int pid)
+{
+ static int dig_nr;
+ long offset_inside_bounds_dir;
+ char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)];
+ char *dig_bounds_dir_ptr;
+ int total_entries = 0;
+ int nr_populated_bdes = 0;
+ int inspect_self;
+
+ if (getpid() == pid) {
+ dprintf4("inspecting self\n");
+ inspect_self = 1;
+ } else {
+ dprintf4("inspecting pid %d\n", pid);
+ mpx_dig_abort();
+ }
+
+ for (offset_inside_bounds_dir = 0;
+ offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES;
+ offset_inside_bounds_dir += sizeof(bounds_dir_buf)) {
+ static int bufs_skipped;
+ int this_entries;
+
+ if (inspect_self) {
+ dig_bounds_dir_ptr =
+ fill_bounds_dir_buf_self(offset_inside_bounds_dir,
+ sizeof(bounds_dir_buf),
+ &bounds_dir_buf[0]);
+ } else {
+ dig_bounds_dir_ptr =
+ fill_bounds_dir_buf_other(offset_inside_bounds_dir,
+ sizeof(bounds_dir_buf),
+ &bounds_dir_buf[0]);
+ }
+ if (!dig_bounds_dir_ptr) {
+ bufs_skipped++;
+ continue;
+ }
+ this_entries = search_bd_buf(dig_bounds_dir_ptr,
+ sizeof(bounds_dir_buf),
+ offset_inside_bounds_dir,
+ &nr_populated_bdes);
+ total_entries += this_entries;
+ }
+ printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr,
+ total_entries, nr_populated_bdes);
+ return total_entries + nr_populated_bdes;
+}
+
+#ifdef MPX_DIG_REMOTE
+int main(int argc, char **argv)
+{
+ int err;
+ char *c;
+ unsigned long bounds_dir_entry;
+ int pid;
+
+ printf("mpx-dig starting...\n");
+ err = sscanf(argv[1], "%d", &pid);
+ printf("parsing: '%s', err: %d\n", argv[1], err);
+ if (err != 1)
+ mpx_dig_abort();
+
+ err = sscanf(argv[2], "%lx", &bounds_dir_global);
+ printf("parsing: '%s': %d\n", argv[2], err);
+ if (err != 1)
+ mpx_dig_abort();
+
+ proc_pid_mem_fd = open_proc(pid, "mem");
+ if (proc_pid_mem_fd < 0)
+ mpx_dig_abort();
+
+ inspect_pid(pid);
+ return 0;
+}
+#endif
+
+long inspect_me(struct mpx_bounds_dir *bounds_dir)
+{
+ int pid = getpid();
+
+ pid_load_vaddrs(pid);
+ bounds_dir_global = (unsigned long)bounds_dir;
+ dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir);
+ return inspect_pid(pid);
+}
diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h
new file mode 100644
index 000000000..d1b61ab87
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-hw.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MPX_HW_H
+#define _MPX_HW_H
+
+#include <assert.h>
+
+/* Describe the MPX Hardware Layout in here */
+
+#define NR_MPX_BOUNDS_REGISTERS 4
+
+#ifdef __i386__
+
+#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 16 /* 4 * 32-bits */
+#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 14) /* 16k */
+#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 4
+#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 22) /* 4MB */
+
+#define MPX_BOUNDS_TABLE_BOTTOM_BIT 2
+#define MPX_BOUNDS_TABLE_TOP_BIT 11
+#define MPX_BOUNDS_DIR_BOTTOM_BIT 12
+#define MPX_BOUNDS_DIR_TOP_BIT 31
+
+#else
+
+/*
+ * Linear Address of "pointer" (LAp)
+ * 0 -> 2: ignored
+ * 3 -> 19: index in to bounds table
+ * 20 -> 47: index in to bounds directory
+ * 48 -> 63: ignored
+ */
+
+#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 32
+#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 22) /* 4MB */
+#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 8
+#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 31) /* 2GB */
+
+#define MPX_BOUNDS_TABLE_BOTTOM_BIT 3
+#define MPX_BOUNDS_TABLE_TOP_BIT 19
+#define MPX_BOUNDS_DIR_BOTTOM_BIT 20
+#define MPX_BOUNDS_DIR_TOP_BIT 47
+
+#endif
+
+#define MPX_BOUNDS_DIR_NR_ENTRIES \
+ (MPX_BOUNDS_DIR_SIZE_BYTES/MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES)
+#define MPX_BOUNDS_TABLE_NR_ENTRIES \
+ (MPX_BOUNDS_TABLE_SIZE_BYTES/MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES)
+
+#define MPX_BOUNDS_TABLE_ENTRY_VALID_BIT 0x1
+
+struct mpx_bd_entry {
+ union {
+ char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES];
+ void *contents[0];
+ };
+} __attribute__((packed));
+
+struct mpx_bt_entry {
+ union {
+ char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES];
+ unsigned long contents[0];
+ };
+} __attribute__((packed));
+
+struct mpx_bounds_dir {
+ struct mpx_bd_entry entries[MPX_BOUNDS_DIR_NR_ENTRIES];
+} __attribute__((packed));
+
+struct mpx_bounds_table {
+ struct mpx_bt_entry entries[MPX_BOUNDS_TABLE_NR_ENTRIES];
+} __attribute__((packed));
+
+static inline unsigned long GET_BITS(unsigned long val, int bottombit, int topbit)
+{
+ int total_nr_bits = topbit - bottombit;
+ unsigned long mask = (1UL << total_nr_bits)-1;
+ return (val >> bottombit) & mask;
+}
+
+static inline unsigned long __vaddr_bounds_table_index(void *vaddr)
+{
+ return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_TABLE_BOTTOM_BIT,
+ MPX_BOUNDS_TABLE_TOP_BIT);
+}
+
+static inline unsigned long __vaddr_bounds_directory_index(void *vaddr)
+{
+ return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_DIR_BOTTOM_BIT,
+ MPX_BOUNDS_DIR_TOP_BIT);
+}
+
+static inline struct mpx_bd_entry *mpx_vaddr_to_bd_entry(void *vaddr,
+ struct mpx_bounds_dir *bounds_dir)
+{
+ unsigned long index = __vaddr_bounds_directory_index(vaddr);
+ return &bounds_dir->entries[index];
+}
+
+static inline int bd_entry_valid(struct mpx_bd_entry *bounds_dir_entry)
+{
+ unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
+ return (__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
+}
+
+static inline struct mpx_bounds_table *
+__bd_entry_to_bounds_table(struct mpx_bd_entry *bounds_dir_entry)
+{
+ unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
+ assert(__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
+ __bd_entry &= ~MPX_BOUNDS_TABLE_ENTRY_VALID_BIT;
+ return (struct mpx_bounds_table *)__bd_entry;
+}
+
+static inline struct mpx_bt_entry *
+mpx_vaddr_to_bt_entry(void *vaddr, struct mpx_bounds_dir *bounds_dir)
+{
+ struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(vaddr, bounds_dir);
+ struct mpx_bounds_table *bt = __bd_entry_to_bounds_table(bde);
+ unsigned long index = __vaddr_bounds_table_index(vaddr);
+ return &bt->entries[index];
+}
+
+#endif /* _MPX_HW_H */
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c
new file mode 100644
index 000000000..50f7e9272
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-mini-test.c
@@ -0,0 +1,1616 @@
+/*
+ * mpx-mini-test.c: routines to test Intel MPX (Memory Protection eXtentions)
+ *
+ * Written by:
+ * "Ren, Qiaowei" <qiaowei.ren@intel.com>
+ * "Wei, Gang" <gang.wei@intel.com>
+ * "Hansen, Dave" <dave.hansen@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2.
+ */
+
+/*
+ * 2014-12-05: Dave Hansen: fixed all of the compiler warnings, and made sure
+ * it works on 32-bit.
+ */
+
+int inspect_every_this_many_mallocs = 100;
+int zap_all_every_this_many_mallocs = 1000;
+
+#define _GNU_SOURCE
+#define _LARGEFILE64_SOURCE
+
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "mpx-hw.h"
+#include "mpx-debug.h"
+#include "mpx-mm.h"
+
+#ifndef __always_inline
+#define __always_inline inline __attribute__((always_inline)
+#endif
+
+#ifndef TEST_DURATION_SECS
+#define TEST_DURATION_SECS 3
+#endif
+
+void write_int_to(char *prefix, char *file, int int_to_write)
+{
+ char buf[100];
+ int fd = open(file, O_RDWR);
+ int len;
+ int ret;
+
+ assert(fd >= 0);
+ len = snprintf(buf, sizeof(buf), "%s%d", prefix, int_to_write);
+ assert(len >= 0);
+ assert(len < sizeof(buf));
+ ret = write(fd, buf, len);
+ assert(ret == len);
+ ret = close(fd);
+ assert(!ret);
+}
+
+void write_pid_to(char *prefix, char *file)
+{
+ write_int_to(prefix, file, getpid());
+}
+
+void trace_me(void)
+{
+/* tracing events dir */
+#define TED "/sys/kernel/debug/tracing/events/"
+/*
+ write_pid_to("common_pid=", TED "signal/filter");
+ write_pid_to("common_pid=", TED "exceptions/filter");
+ write_int_to("", TED "signal/enable", 1);
+ write_int_to("", TED "exceptions/enable", 1);
+*/
+ write_pid_to("", "/sys/kernel/debug/tracing/set_ftrace_pid");
+ write_int_to("", "/sys/kernel/debug/tracing/trace", 0);
+}
+
+#define test_failed() __test_failed(__FILE__, __LINE__)
+static void __test_failed(char *f, int l)
+{
+ fprintf(stderr, "abort @ %s::%d\n", f, l);
+ abort();
+}
+
+/* Error Printf */
+#define eprintf(args...) fprintf(stderr, args)
+
+#ifdef __i386__
+
+/* i386 directory size is 4MB */
+#define REG_IP_IDX REG_EIP
+#define REX_PREFIX
+
+#define XSAVE_OFFSET_IN_FPMEM sizeof(struct _libc_fpstate)
+
+/*
+ * __cpuid() is from the Linux Kernel:
+ */
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile(
+ "push %%ebx;"
+ "cpuid;"
+ "mov %%ebx, %1;"
+ "pop %%ebx"
+ : "=a" (*eax),
+ "=g" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+#else /* __i386__ */
+
+#define REG_IP_IDX REG_RIP
+#define REX_PREFIX "0x48, "
+
+#define XSAVE_OFFSET_IN_FPMEM 0
+
+/*
+ * __cpuid() is from the Linux Kernel:
+ */
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile(
+ "cpuid;"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+#endif /* !__i386__ */
+
+struct xsave_hdr_struct {
+ uint64_t xstate_bv;
+ uint64_t reserved1[2];
+ uint64_t reserved2[5];
+} __attribute__((packed));
+
+struct bndregs_struct {
+ uint64_t bndregs[8];
+} __attribute__((packed));
+
+struct bndcsr_struct {
+ uint64_t cfg_reg_u;
+ uint64_t status_reg;
+} __attribute__((packed));
+
+struct xsave_struct {
+ uint8_t fpu_sse[512];
+ struct xsave_hdr_struct xsave_hdr;
+ uint8_t ymm[256];
+ uint8_t lwp[128];
+ struct bndregs_struct bndregs;
+ struct bndcsr_struct bndcsr;
+} __attribute__((packed));
+
+uint8_t __attribute__((__aligned__(64))) buffer[4096];
+struct xsave_struct *xsave_buf = (struct xsave_struct *)buffer;
+
+uint8_t __attribute__((__aligned__(64))) test_buffer[4096];
+struct xsave_struct *xsave_test_buf = (struct xsave_struct *)test_buffer;
+
+uint64_t num_bnd_chk;
+
+static __always_inline void xrstor_state(struct xsave_struct *fx, uint64_t mask)
+{
+ uint32_t lmask = mask;
+ uint32_t hmask = mask >> 32;
+
+ asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
+ : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ : "memory");
+}
+
+static __always_inline void xsave_state_1(void *_fx, uint64_t mask)
+{
+ uint32_t lmask = mask;
+ uint32_t hmask = mask >> 32;
+ unsigned char *fx = _fx;
+
+ asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
+ : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ : "memory");
+}
+
+static inline uint64_t xgetbv(uint32_t index)
+{
+ uint32_t eax, edx;
+
+ asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
+ : "=a" (eax), "=d" (edx)
+ : "c" (index));
+ return eax + ((uint64_t)edx << 32);
+}
+
+static uint64_t read_mpx_status_sig(ucontext_t *uctxt)
+{
+ memset(buffer, 0, sizeof(buffer));
+ memcpy(buffer,
+ (uint8_t *)uctxt->uc_mcontext.fpregs + XSAVE_OFFSET_IN_FPMEM,
+ sizeof(struct xsave_struct));
+
+ return xsave_buf->bndcsr.status_reg;
+}
+
+#include <pthread.h>
+
+static uint8_t *get_next_inst_ip(uint8_t *addr)
+{
+ uint8_t *ip = addr;
+ uint8_t sib;
+ uint8_t rm;
+ uint8_t mod;
+ uint8_t base;
+ uint8_t modrm;
+
+ /* determine the prefix. */
+ switch(*ip) {
+ case 0xf2:
+ case 0xf3:
+ case 0x66:
+ ip++;
+ break;
+ }
+
+ /* look for rex prefix */
+ if ((*ip & 0x40) == 0x40)
+ ip++;
+
+ /* Make sure we have a MPX instruction. */
+ if (*ip++ != 0x0f)
+ return addr;
+
+ /* Skip the op code byte. */
+ ip++;
+
+ /* Get the modrm byte. */
+ modrm = *ip++;
+
+ /* Break it down into parts. */
+ rm = modrm & 7;
+ mod = (modrm >> 6);
+
+ /* Init the parts of the address mode. */
+ base = 8;
+
+ /* Is it a mem mode? */
+ if (mod != 3) {
+ /* look for scaled indexed addressing */
+ if (rm == 4) {
+ /* SIB addressing */
+ sib = *ip++;
+ base = sib & 7;
+ switch (mod) {
+ case 0:
+ if (base == 5)
+ ip += 4;
+ break;
+
+ case 1:
+ ip++;
+ break;
+
+ case 2:
+ ip += 4;
+ break;
+ }
+
+ } else {
+ /* MODRM addressing */
+ switch (mod) {
+ case 0:
+ /* DISP32 addressing, no base */
+ if (rm == 5)
+ ip += 4;
+ break;
+
+ case 1:
+ ip++;
+ break;
+
+ case 2:
+ ip += 4;
+ break;
+ }
+ }
+ }
+ return ip;
+}
+
+#ifdef si_lower
+static inline void *__si_bounds_lower(siginfo_t *si)
+{
+ return si->si_lower;
+}
+
+static inline void *__si_bounds_upper(siginfo_t *si)
+{
+ return si->si_upper;
+}
+#else
+
+/*
+ * This deals with old version of _sigfault in some distros:
+ *
+
+old _sigfault:
+ struct {
+ void *si_addr;
+ } _sigfault;
+
+new _sigfault:
+ struct {
+ void __user *_addr;
+ int _trapno;
+ short _addr_lsb;
+ union {
+ struct {
+ void __user *_lower;
+ void __user *_upper;
+ } _addr_bnd;
+ __u32 _pkey;
+ };
+ } _sigfault;
+ *
+ */
+
+static inline void **__si_bounds_hack(siginfo_t *si)
+{
+ void *sigfault = &si->_sifields._sigfault;
+ void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault);
+ int *trapno = (int*)end_sigfault;
+ /* skip _trapno and _addr_lsb */
+ void **__si_lower = (void**)(trapno + 2);
+
+ return __si_lower;
+}
+
+static inline void *__si_bounds_lower(siginfo_t *si)
+{
+ return *__si_bounds_hack(si);
+}
+
+static inline void *__si_bounds_upper(siginfo_t *si)
+{
+ return *(__si_bounds_hack(si) + 1);
+}
+#endif
+
+static int br_count;
+static int expected_bnd_index = -1;
+uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
+unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
+
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR 3
+#endif
+
+/*
+ * The kernel is supposed to provide some information about the bounds
+ * exception in the siginfo. It should match what we have in the bounds
+ * registers that we are checking against. Just check against the shadow copy
+ * since it is easily available, and we also check that *it* matches the real
+ * registers.
+ */
+void check_siginfo_vs_shadow(siginfo_t* si)
+{
+ int siginfo_ok = 1;
+ void *shadow_lower = (void *)(unsigned long)shadow_plb[expected_bnd_index][0];
+ void *shadow_upper = (void *)(unsigned long)shadow_plb[expected_bnd_index][1];
+
+ if ((expected_bnd_index < 0) ||
+ (expected_bnd_index >= NR_MPX_BOUNDS_REGISTERS)) {
+ fprintf(stderr, "ERROR: invalid expected_bnd_index: %d\n",
+ expected_bnd_index);
+ exit(6);
+ }
+ if (__si_bounds_lower(si) != shadow_lower)
+ siginfo_ok = 0;
+ if (__si_bounds_upper(si) != shadow_upper)
+ siginfo_ok = 0;
+
+ if (!siginfo_ok) {
+ fprintf(stderr, "ERROR: siginfo bounds do not match "
+ "shadow bounds for register %d\n", expected_bnd_index);
+ exit(7);
+ }
+}
+
+void handler(int signum, siginfo_t *si, void *vucontext)
+{
+ int i;
+ ucontext_t *uctxt = vucontext;
+ int trapno;
+ unsigned long ip;
+
+ dprintf1("entered signal handler\n");
+
+ trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
+ ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
+
+ if (trapno == 5) {
+ typeof(si->si_addr) *si_addr_ptr = &si->si_addr;
+ uint64_t status = read_mpx_status_sig(uctxt);
+ uint64_t br_reason = status & 0x3;
+
+ br_count++;
+ dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
+
+ dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
+ status, ip, br_reason);
+ dprintf2("si_signo: %d\n", si->si_signo);
+ dprintf2(" signum: %d\n", signum);
+ dprintf2("info->si_code == SEGV_BNDERR: %d\n",
+ (si->si_code == SEGV_BNDERR));
+ dprintf2("info->si_code: %d\n", si->si_code);
+ dprintf2("info->si_lower: %p\n", __si_bounds_lower(si));
+ dprintf2("info->si_upper: %p\n", __si_bounds_upper(si));
+
+ for (i = 0; i < 8; i++)
+ dprintf3("[%d]: %p\n", i, si_addr_ptr[i]);
+ switch (br_reason) {
+ case 0: /* traditional BR */
+ fprintf(stderr,
+ "Undefined status with bound exception:%jx\n",
+ status);
+ exit(5);
+ case 1: /* #BR MPX bounds exception */
+ /* these are normal and we expect to see them */
+
+ check_siginfo_vs_shadow(si);
+
+ dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n",
+ status, (void *)ip, si->si_addr);
+ num_bnd_chk++;
+ uctxt->uc_mcontext.gregs[REG_IP_IDX] =
+ (greg_t)get_next_inst_ip((uint8_t *)ip);
+ break;
+ case 2:
+ fprintf(stderr, "#BR status == 2, missing bounds table,"
+ "kernel should have handled!!\n");
+ exit(4);
+ break;
+ default:
+ fprintf(stderr, "bound check error: status 0x%jx at %p\n",
+ status, (void *)ip);
+ num_bnd_chk++;
+ uctxt->uc_mcontext.gregs[REG_IP_IDX] =
+ (greg_t)get_next_inst_ip((uint8_t *)ip);
+ fprintf(stderr, "bound check error: si_addr %p\n", si->si_addr);
+ exit(3);
+ }
+ } else if (trapno == 14) {
+ eprintf("ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
+ trapno, ip);
+ eprintf("si_addr %p\n", si->si_addr);
+ eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
+ test_failed();
+ } else {
+ eprintf("unexpected trap %d! at 0x%lx\n", trapno, ip);
+ eprintf("si_addr %p\n", si->si_addr);
+ eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
+ test_failed();
+ }
+}
+
+static inline void cpuid_count(unsigned int op, int count,
+ unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ *eax = op;
+ *ecx = count;
+ __cpuid(eax, ebx, ecx, edx);
+}
+
+#define XSTATE_CPUID 0x0000000d
+
+/*
+ * List of XSAVE features Linux knows about:
+ */
+enum xfeature_bit {
+ XSTATE_BIT_FP,
+ XSTATE_BIT_SSE,
+ XSTATE_BIT_YMM,
+ XSTATE_BIT_BNDREGS,
+ XSTATE_BIT_BNDCSR,
+ XSTATE_BIT_OPMASK,
+ XSTATE_BIT_ZMM_Hi256,
+ XSTATE_BIT_Hi16_ZMM,
+
+ XFEATURES_NR_MAX,
+};
+
+#define XSTATE_FP (1 << XSTATE_BIT_FP)
+#define XSTATE_SSE (1 << XSTATE_BIT_SSE)
+#define XSTATE_YMM (1 << XSTATE_BIT_YMM)
+#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS)
+#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR)
+#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK)
+#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256)
+#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM)
+
+#define MPX_XSTATES (XSTATE_BNDREGS | XSTATE_BNDCSR) /* 0x18 */
+
+bool one_bit(unsigned int x, int bit)
+{
+ return !!(x & (1<<bit));
+}
+
+void print_state_component(int state_bit_nr, char *name)
+{
+ unsigned int eax, ebx, ecx, edx;
+ unsigned int state_component_size;
+ unsigned int state_component_supervisor;
+ unsigned int state_component_user;
+ unsigned int state_component_aligned;
+
+ /* See SDM Section 13.2 */
+ cpuid_count(XSTATE_CPUID, state_bit_nr, &eax, &ebx, &ecx, &edx);
+ assert(eax || ebx || ecx);
+ state_component_size = eax;
+ state_component_supervisor = ((!ebx) && one_bit(ecx, 0));
+ state_component_user = !one_bit(ecx, 0);
+ state_component_aligned = one_bit(ecx, 1);
+ printf("%8s: size: %d user: %d supervisor: %d aligned: %d\n",
+ name,
+ state_component_size, state_component_user,
+ state_component_supervisor, state_component_aligned);
+
+}
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx) */
+#define XSAVE_FEATURE_BIT (26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define OSXSAVE_FEATURE_BIT (27) /* XSAVE enabled in the OS */
+
+bool check_mpx_support(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid_count(1, 0, &eax, &ebx, &ecx, &edx);
+
+ /* We can't do much without XSAVE, so just make these assert()'s */
+ if (!one_bit(ecx, XSAVE_FEATURE_BIT)) {
+ fprintf(stderr, "processor lacks XSAVE, can not run MPX tests\n");
+ exit(0);
+ }
+
+ if (!one_bit(ecx, OSXSAVE_FEATURE_BIT)) {
+ fprintf(stderr, "processor lacks OSXSAVE, can not run MPX tests\n");
+ exit(0);
+ }
+
+ /* CPUs not supporting the XSTATE CPUID leaf do not support MPX */
+ /* Is this redundant with the feature bit checks? */
+ cpuid_count(0, 0, &eax, &ebx, &ecx, &edx);
+ if (eax < XSTATE_CPUID) {
+ fprintf(stderr, "processor lacks XSTATE CPUID leaf,"
+ " can not run MPX tests\n");
+ exit(0);
+ }
+
+ printf("XSAVE is supported by HW & OS\n");
+
+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+
+ printf("XSAVE processor supported state mask: 0x%x\n", eax);
+ printf("XSAVE OS supported state mask: 0x%jx\n", xgetbv(0));
+
+ /* Make sure that the MPX states are enabled in in XCR0 */
+ if ((eax & MPX_XSTATES) != MPX_XSTATES) {
+ fprintf(stderr, "processor lacks MPX XSTATE(s), can not run MPX tests\n");
+ exit(0);
+ }
+
+ /* Make sure the MPX states are supported by XSAVE* */
+ if ((xgetbv(0) & MPX_XSTATES) != MPX_XSTATES) {
+ fprintf(stderr, "MPX XSTATE(s) no enabled in XCR0, "
+ "can not run MPX tests\n");
+ exit(0);
+ }
+
+ print_state_component(XSTATE_BIT_BNDREGS, "BNDREGS");
+ print_state_component(XSTATE_BIT_BNDCSR, "BNDCSR");
+
+ return true;
+}
+
+void enable_mpx(void *l1base)
+{
+ /* enable point lookup */
+ memset(buffer, 0, sizeof(buffer));
+ xrstor_state(xsave_buf, 0x18);
+
+ xsave_buf->xsave_hdr.xstate_bv = 0x10;
+ xsave_buf->bndcsr.cfg_reg_u = (unsigned long)l1base | 1;
+ xsave_buf->bndcsr.status_reg = 0;
+
+ dprintf2("bf xrstor\n");
+ dprintf2("xsave cndcsr: status %jx, configu %jx\n",
+ xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
+ xrstor_state(xsave_buf, 0x18);
+ dprintf2("after xrstor\n");
+
+ xsave_state_1(xsave_buf, 0x18);
+
+ dprintf1("xsave bndcsr: status %jx, configu %jx\n",
+ xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
+}
+
+#include <sys/prctl.h>
+
+struct mpx_bounds_dir *bounds_dir_ptr;
+
+unsigned long __bd_incore(const char *func, int line)
+{
+ unsigned long ret = nr_incore(bounds_dir_ptr, MPX_BOUNDS_DIR_SIZE_BYTES);
+ return ret;
+}
+#define bd_incore() __bd_incore(__func__, __LINE__)
+
+void check_clear(void *ptr, unsigned long sz)
+{
+ unsigned long *i;
+
+ for (i = ptr; (void *)i < ptr + sz; i++) {
+ if (*i) {
+ dprintf1("%p is NOT clear at %p\n", ptr, i);
+ assert(0);
+ }
+ }
+ dprintf1("%p is clear for %lx\n", ptr, sz);
+}
+
+void check_clear_bd(void)
+{
+ check_clear(bounds_dir_ptr, 2UL << 30);
+}
+
+#define USE_MALLOC_FOR_BOUNDS_DIR 1
+bool process_specific_init(void)
+{
+ unsigned long size;
+ unsigned long *dir;
+ /* Guarantee we have the space to align it, add padding: */
+ unsigned long pad = getpagesize();
+
+ size = 2UL << 30; /* 2GB */
+ if (sizeof(unsigned long) == 4)
+ size = 4UL << 20; /* 4MB */
+ dprintf1("trying to allocate %ld MB bounds directory\n", (size >> 20));
+
+ if (USE_MALLOC_FOR_BOUNDS_DIR) {
+ unsigned long _dir;
+
+ dir = malloc(size + pad);
+ assert(dir);
+ _dir = (unsigned long)dir;
+ _dir += 0xfffUL;
+ _dir &= ~0xfffUL;
+ dir = (void *)_dir;
+ } else {
+ /*
+ * This makes debugging easier because the address
+ * calculations are simpler:
+ */
+ dir = mmap((void *)0x200000000000, size + pad,
+ PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ if (dir == (void *)-1) {
+ perror("unable to allocate bounds directory");
+ abort();
+ }
+ check_clear(dir, size);
+ }
+ bounds_dir_ptr = (void *)dir;
+ madvise(bounds_dir_ptr, size, MADV_NOHUGEPAGE);
+ bd_incore();
+ dprintf1("bounds directory: 0x%p -> 0x%p\n", bounds_dir_ptr,
+ (char *)bounds_dir_ptr + size);
+ check_clear(dir, size);
+ enable_mpx(dir);
+ check_clear(dir, size);
+ if (prctl(43, 0, 0, 0, 0)) {
+ printf("no MPX support\n");
+ abort();
+ return false;
+ }
+ return true;
+}
+
+bool process_specific_finish(void)
+{
+ if (prctl(44)) {
+ printf("no MPX support\n");
+ return false;
+ }
+ return true;
+}
+
+void setup_handler()
+{
+ int r, rs;
+ struct sigaction newact;
+ struct sigaction oldact;
+
+ /* #BR is mapped to sigsegv */
+ int signum = SIGSEGV;
+
+ newact.sa_handler = 0; /* void(*)(int)*/
+ newact.sa_sigaction = handler; /* void (*)(int, siginfo_t*, void *) */
+
+ /*sigset_t - signals to block while in the handler */
+ /* get the old signal mask. */
+ rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
+ assert(rs == 0);
+
+ /* call sa_sigaction, not sa_handler*/
+ newact.sa_flags = SA_SIGINFO;
+
+ newact.sa_restorer = 0; /* void(*)(), obsolete */
+ r = sigaction(signum, &newact, &oldact);
+ assert(r == 0);
+}
+
+void mpx_prepare(void)
+{
+ dprintf2("%s()\n", __func__);
+ setup_handler();
+ process_specific_init();
+}
+
+void mpx_cleanup(void)
+{
+ printf("%s(): %jd BRs. bye...\n", __func__, num_bnd_chk);
+ process_specific_finish();
+}
+
+/*-------------- the following is test case ---------------*/
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+uint64_t num_lower_brs;
+uint64_t num_upper_brs;
+
+#define MPX_CONFIG_OFFSET 1024
+#define MPX_BOUNDS_OFFSET 960
+#define MPX_HEADER_OFFSET 512
+#define MAX_ADDR_TESTED (1<<28)
+#define TEST_ROUNDS 100
+
+/*
+ 0F 1A /r BNDLDX-Load
+ 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation
+ 66 0F 1A /r BNDMOV bnd1, bnd2/m128
+ 66 0F 1B /r BNDMOV bnd1/m128, bnd2
+ F2 0F 1A /r BNDCU bnd, r/m64
+ F2 0F 1B /r BNDCN bnd, r/m64
+ F3 0F 1A /r BNDCL bnd, r/m64
+ F3 0F 1B /r BNDMK bnd, m64
+*/
+
+static __always_inline void xsave_state(void *_fx, uint64_t mask)
+{
+ uint32_t lmask = mask;
+ uint32_t hmask = mask >> 32;
+ unsigned char *fx = _fx;
+
+ asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
+ : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ : "memory");
+}
+
+static __always_inline void mpx_clear_bnd0(void)
+{
+ long size = 0;
+ void *ptr = NULL;
+ /* F3 0F 1B /r BNDMK bnd, m64 */
+ /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */
+ asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
+ : : "c" (ptr), "d" (size-1)
+ : "memory");
+}
+
+static __always_inline void mpx_make_bound_helper(unsigned long ptr,
+ unsigned long size)
+{
+ /* F3 0F 1B /r BNDMK bnd, m64 */
+ /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */
+ asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
+ : : "c" (ptr), "d" (size-1)
+ : "memory");
+}
+
+static __always_inline void mpx_check_lowerbound_helper(unsigned long ptr)
+{
+ /* F3 0F 1A /r NDCL bnd, r/m64 */
+ /* f3 0f 1a 01 bndcl (%rcx),%bnd0 */
+ asm volatile(".byte 0xf3,0x0f,0x1a,0x01\n\t"
+ : : "c" (ptr)
+ : "memory");
+}
+
+static __always_inline void mpx_check_upperbound_helper(unsigned long ptr)
+{
+ /* F2 0F 1A /r BNDCU bnd, r/m64 */
+ /* f2 0f 1a 01 bndcu (%rcx),%bnd0 */
+ asm volatile(".byte 0xf2,0x0f,0x1a,0x01\n\t"
+ : : "c" (ptr)
+ : "memory");
+}
+
+static __always_inline void mpx_movbndreg_helper()
+{
+ /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */
+ /* 66 0f 1b c2 bndmov %bnd0,%bnd2 */
+
+ asm volatile(".byte 0x66,0x0f,0x1b,0xc2\n\t");
+}
+
+static __always_inline void mpx_movbnd2mem_helper(uint8_t *mem)
+{
+ /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */
+ /* 66 0f 1b 01 bndmov %bnd0,(%rcx) */
+ asm volatile(".byte 0x66,0x0f,0x1b,0x01\n\t"
+ : : "c" (mem)
+ : "memory");
+}
+
+static __always_inline void mpx_movbnd_from_mem_helper(uint8_t *mem)
+{
+ /* 66 0F 1A /r BNDMOV bnd1, bnd2/m128 */
+ /* 66 0f 1a 01 bndmov (%rcx),%bnd0 */
+ asm volatile(".byte 0x66,0x0f,0x1a,0x01\n\t"
+ : : "c" (mem)
+ : "memory");
+}
+
+static __always_inline void mpx_store_dsc_helper(unsigned long ptr_addr,
+ unsigned long ptr_val)
+{
+ /* 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation */
+ /* 0f 1b 04 11 bndstx %bnd0,(%rcx,%rdx,1) */
+ asm volatile(".byte 0x0f,0x1b,0x04,0x11\n\t"
+ : : "c" (ptr_addr), "d" (ptr_val)
+ : "memory");
+}
+
+static __always_inline void mpx_load_dsc_helper(unsigned long ptr_addr,
+ unsigned long ptr_val)
+{
+ /* 0F 1A /r BNDLDX-Load */
+ /*/ 0f 1a 04 11 bndldx (%rcx,%rdx,1),%bnd0 */
+ asm volatile(".byte 0x0f,0x1a,0x04,0x11\n\t"
+ : : "c" (ptr_addr), "d" (ptr_val)
+ : "memory");
+}
+
+void __print_context(void *__print_xsave_buffer, int line)
+{
+ uint64_t *bounds = (uint64_t *)(__print_xsave_buffer + MPX_BOUNDS_OFFSET);
+ uint64_t *cfg = (uint64_t *)(__print_xsave_buffer + MPX_CONFIG_OFFSET);
+
+ int i;
+ eprintf("%s()::%d\n", "print_context", line);
+ for (i = 0; i < 4; i++) {
+ eprintf("bound[%d]: 0x%016lx 0x%016lx(0x%016lx)\n", i,
+ (unsigned long)bounds[i*2],
+ ~(unsigned long)bounds[i*2+1],
+ (unsigned long)bounds[i*2+1]);
+ }
+
+ eprintf("cpcfg: %jx cpstatus: %jx\n", cfg[0], cfg[1]);
+}
+#define print_context(x) __print_context(x, __LINE__)
+#ifdef DEBUG
+#define dprint_context(x) print_context(x)
+#else
+#define dprint_context(x) do{}while(0)
+#endif
+
+void init()
+{
+ int i;
+
+ srand((unsigned int)time(NULL));
+
+ for (i = 0; i < 4; i++) {
+ shadow_plb[i][0] = 0;
+ shadow_plb[i][1] = ~(unsigned long)0;
+ }
+}
+
+long int __mpx_random(int line)
+{
+#ifdef NOT_SO_RANDOM
+ static long fake = 722122311;
+ fake += 563792075;
+ return fakse;
+#else
+ return random();
+#endif
+}
+#define mpx_random() __mpx_random(__LINE__)
+
+uint8_t *get_random_addr()
+{
+ uint8_t*addr = (uint8_t *)(unsigned long)(rand() % MAX_ADDR_TESTED);
+ return (addr - (unsigned long)addr % sizeof(uint8_t *));
+}
+
+static inline bool compare_context(void *__xsave_buffer)
+{
+ uint64_t *bounds = (uint64_t *)(__xsave_buffer + MPX_BOUNDS_OFFSET);
+
+ int i;
+ for (i = 0; i < 4; i++) {
+ dprintf3("shadow[%d]{%016lx/%016lx}\nbounds[%d]{%016lx/%016lx}\n",
+ i, (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
+ i, (unsigned long)bounds[i*2], ~(unsigned long)bounds[i*2+1]);
+ if ((shadow_plb[i][0] != bounds[i*2]) ||
+ (shadow_plb[i][1] != ~(unsigned long)bounds[i*2+1])) {
+ eprintf("ERROR comparing shadow to real bound register %d\n", i);
+ eprintf("shadow{0x%016lx/0x%016lx}\nbounds{0x%016lx/0x%016lx}\n",
+ (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
+ (unsigned long)bounds[i*2], (unsigned long)bounds[i*2+1]);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void mkbnd_shadow(uint8_t *ptr, int index, long offset)
+{
+ uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
+ uint64_t *upper = (uint64_t *)&(shadow_plb[index][1]);
+ *lower = (unsigned long)ptr;
+ *upper = (unsigned long)ptr + offset - 1;
+}
+
+void check_lowerbound_shadow(uint8_t *ptr, int index)
+{
+ uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
+ if (*lower > (uint64_t)(unsigned long)ptr)
+ num_lower_brs++;
+ else
+ dprintf1("LowerBoundChk passed:%p\n", ptr);
+}
+
+void check_upperbound_shadow(uint8_t *ptr, int index)
+{
+ uint64_t upper = *(uint64_t *)&(shadow_plb[index][1]);
+ if (upper < (uint64_t)(unsigned long)ptr)
+ num_upper_brs++;
+ else
+ dprintf1("UpperBoundChk passed:%p\n", ptr);
+}
+
+__always_inline void movbndreg_shadow(int src, int dest)
+{
+ shadow_plb[dest][0] = shadow_plb[src][0];
+ shadow_plb[dest][1] = shadow_plb[src][1];
+}
+
+__always_inline void movbnd2mem_shadow(int src, unsigned long *dest)
+{
+ unsigned long *lower = (unsigned long *)&(shadow_plb[src][0]);
+ unsigned long *upper = (unsigned long *)&(shadow_plb[src][1]);
+ *dest = *lower;
+ *(dest+1) = *upper;
+}
+
+__always_inline void movbnd_from_mem_shadow(unsigned long *src, int dest)
+{
+ unsigned long *lower = (unsigned long *)&(shadow_plb[dest][0]);
+ unsigned long *upper = (unsigned long *)&(shadow_plb[dest][1]);
+ *lower = *src;
+ *upper = *(src+1);
+}
+
+__always_inline void stdsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
+{
+ shadow_map[0] = (unsigned long)shadow_plb[index][0];
+ shadow_map[1] = (unsigned long)shadow_plb[index][1];
+ shadow_map[2] = (unsigned long)ptr_val;
+ dprintf3("%s(%d, %p, %p) set shadow map[2]: %p\n", __func__,
+ index, ptr, ptr_val, ptr_val);
+ /*ptr ignored */
+}
+
+void lddsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
+{
+ uint64_t lower = shadow_map[0];
+ uint64_t upper = shadow_map[1];
+ uint8_t *value = (uint8_t *)shadow_map[2];
+
+ if (value != ptr_val) {
+ dprintf2("%s(%d, %p, %p) init shadow bounds[%d] "
+ "because %p != %p\n", __func__, index, ptr,
+ ptr_val, index, value, ptr_val);
+ shadow_plb[index][0] = 0;
+ shadow_plb[index][1] = ~(unsigned long)0;
+ } else {
+ shadow_plb[index][0] = lower;
+ shadow_plb[index][1] = upper;
+ }
+ /* ptr ignored */
+}
+
+static __always_inline void mpx_test_helper0(uint8_t *buf, uint8_t *ptr)
+{
+ mpx_make_bound_helper((unsigned long)ptr, 0x1800);
+}
+
+static __always_inline void mpx_test_helper0_shadow(uint8_t *buf, uint8_t *ptr)
+{
+ mkbnd_shadow(ptr, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper1(uint8_t *buf, uint8_t *ptr)
+{
+ /* these are hard-coded to check bnd0 */
+ expected_bnd_index = 0;
+ mpx_check_lowerbound_helper((unsigned long)(ptr-1));
+ mpx_check_upperbound_helper((unsigned long)(ptr+0x1800));
+ /* reset this since we do not expect any more bounds exceptions */
+ expected_bnd_index = -1;
+}
+
+static __always_inline void mpx_test_helper1_shadow(uint8_t *buf, uint8_t *ptr)
+{
+ check_lowerbound_shadow(ptr-1, 0);
+ check_upperbound_shadow(ptr+0x1800, 0);
+}
+
+static __always_inline void mpx_test_helper2(uint8_t *buf, uint8_t *ptr)
+{
+ mpx_make_bound_helper((unsigned long)ptr, 0x1800);
+ mpx_movbndreg_helper();
+ mpx_movbnd2mem_helper(buf);
+ mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
+}
+
+static __always_inline void mpx_test_helper2_shadow(uint8_t *buf, uint8_t *ptr)
+{
+ mkbnd_shadow(ptr, 0, 0x1800);
+ movbndreg_shadow(0, 2);
+ movbnd2mem_shadow(0, (unsigned long *)buf);
+ mkbnd_shadow(ptr+0x12, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper3(uint8_t *buf, uint8_t *ptr)
+{
+ mpx_movbnd_from_mem_helper(buf);
+}
+
+static __always_inline void mpx_test_helper3_shadow(uint8_t *buf, uint8_t *ptr)
+{
+ movbnd_from_mem_shadow((unsigned long *)buf, 0);
+}
+
+static __always_inline void mpx_test_helper4(uint8_t *buf, uint8_t *ptr)
+{
+ mpx_store_dsc_helper((unsigned long)buf, (unsigned long)ptr);
+ mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
+}
+
+static __always_inline void mpx_test_helper4_shadow(uint8_t *buf, uint8_t *ptr)
+{
+ stdsc_shadow(0, buf, ptr);
+ mkbnd_shadow(ptr+0x12, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper5(uint8_t *buf, uint8_t *ptr)
+{
+ mpx_load_dsc_helper((unsigned long)buf, (unsigned long)ptr);
+}
+
+static __always_inline void mpx_test_helper5_shadow(uint8_t *buf, uint8_t *ptr)
+{
+ lddsc_shadow(0, buf, ptr);
+}
+
+#define NR_MPX_TEST_FUNCTIONS 6
+
+/*
+ * For compatibility reasons, MPX will clear the bounds registers
+ * when you make function calls (among other things). We have to
+ * preserve the registers in between calls to the "helpers" since
+ * they build on each other.
+ *
+ * Be very careful not to make any function calls inside the
+ * helpers, or anywhere else beween the xrstor and xsave.
+ */
+#define run_helper(helper_nr, buf, buf_shadow, ptr) do { \
+ xrstor_state(xsave_test_buf, flags); \
+ mpx_test_helper##helper_nr(buf, ptr); \
+ xsave_state(xsave_test_buf, flags); \
+ mpx_test_helper##helper_nr##_shadow(buf_shadow, ptr); \
+} while (0)
+
+static void run_helpers(int nr, uint8_t *buf, uint8_t *buf_shadow, uint8_t *ptr)
+{
+ uint64_t flags = 0x18;
+
+ dprint_context(xsave_test_buf);
+ switch (nr) {
+ case 0:
+ run_helper(0, buf, buf_shadow, ptr);
+ break;
+ case 1:
+ run_helper(1, buf, buf_shadow, ptr);
+ break;
+ case 2:
+ run_helper(2, buf, buf_shadow, ptr);
+ break;
+ case 3:
+ run_helper(3, buf, buf_shadow, ptr);
+ break;
+ case 4:
+ run_helper(4, buf, buf_shadow, ptr);
+ break;
+ case 5:
+ run_helper(5, buf, buf_shadow, ptr);
+ break;
+ default:
+ test_failed();
+ break;
+ }
+ dprint_context(xsave_test_buf);
+}
+
+unsigned long buf_shadow[1024]; /* used to check load / store descriptors */
+extern long inspect_me(struct mpx_bounds_dir *bounds_dir);
+
+long cover_buf_with_bt_entries(void *buf, long buf_len)
+{
+ int i;
+ long nr_to_fill;
+ int ratio = 1000;
+ unsigned long buf_len_in_ptrs;
+
+ /* Fill about 1/100 of the space with bt entries */
+ nr_to_fill = buf_len / (sizeof(unsigned long) * ratio);
+
+ if (!nr_to_fill)
+ dprintf3("%s() nr_to_fill: %ld\n", __func__, nr_to_fill);
+
+ /* Align the buffer to pointer size */
+ while (((unsigned long)buf) % sizeof(void *)) {
+ buf++;
+ buf_len--;
+ }
+ /* We are storing pointers, so make */
+ buf_len_in_ptrs = buf_len / sizeof(void *);
+
+ for (i = 0; i < nr_to_fill; i++) {
+ long index = (mpx_random() % buf_len_in_ptrs);
+ void *ptr = buf + index * sizeof(unsigned long);
+ unsigned long ptr_addr = (unsigned long)ptr;
+
+ /* ptr and size can be anything */
+ mpx_make_bound_helper((unsigned long)ptr, 8);
+
+ /*
+ * take bnd0 and put it in to bounds tables "buf + index" is an
+ * address inside the buffer where we are pretending that we
+ * are going to put a pointer We do not, though because we will
+ * never load entries from the table, so it doesn't matter.
+ */
+ mpx_store_dsc_helper(ptr_addr, (unsigned long)ptr);
+ dprintf4("storing bound table entry for %lx (buf start @ %p)\n",
+ ptr_addr, buf);
+ }
+ return nr_to_fill;
+}
+
+unsigned long align_down(unsigned long alignme, unsigned long align_to)
+{
+ return alignme & ~(align_to-1);
+}
+
+unsigned long align_up(unsigned long alignme, unsigned long align_to)
+{
+ return (alignme + align_to - 1) & ~(align_to-1);
+}
+
+/*
+ * Using 1MB alignment guarantees that each no allocation
+ * will overlap with another's bounds tables.
+ *
+ * We have to cook our own allocator here. malloc() can
+ * mix other allocation with ours which means that even
+ * if we free all of our allocations, there might still
+ * be bounds tables for the *areas* since there is other
+ * valid memory there.
+ *
+ * We also can't use malloc() because a free() of an area
+ * might not free it back to the kernel. We want it
+ * completely unmapped an malloc() does not guarantee
+ * that.
+ */
+#ifdef __i386__
+long alignment = 4096;
+long sz_alignment = 4096;
+#else
+long alignment = 1 * MB;
+long sz_alignment = 1 * MB;
+#endif
+void *mpx_mini_alloc(unsigned long sz)
+{
+ unsigned long long tries = 0;
+ static void *last;
+ void *ptr;
+ void *try_at;
+
+ sz = align_up(sz, sz_alignment);
+
+ try_at = last + alignment;
+ while (1) {
+ ptr = mmap(try_at, sz, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ if (ptr == (void *)-1)
+ return NULL;
+ if (ptr == try_at)
+ break;
+
+ munmap(ptr, sz);
+ try_at += alignment;
+#ifdef __i386__
+ /*
+ * This isn't quite correct for 32-bit binaries
+ * on 64-bit kernels since they can use the
+ * entire 32-bit address space, but it's close
+ * enough.
+ */
+ if (try_at > (void *)0xC0000000)
+#else
+ if (try_at > (void *)0x0000800000000000)
+#endif
+ try_at = (void *)0x0;
+ if (!(++tries % 10000))
+ dprintf1("stuck in %s(), tries: %lld\n", __func__, tries);
+ continue;
+ }
+ last = ptr;
+ dprintf3("mpx_mini_alloc(0x%lx) returning: %p\n", sz, ptr);
+ return ptr;
+}
+void mpx_mini_free(void *ptr, long sz)
+{
+ dprintf2("%s() ptr: %p\n", __func__, ptr);
+ if ((unsigned long)ptr > 0x100000000000) {
+ dprintf1("uh oh !!!!!!!!!!!!!!! pointer too high: %p\n", ptr);
+ test_failed();
+ }
+ sz = align_up(sz, sz_alignment);
+ dprintf3("%s() ptr: %p before munmap\n", __func__, ptr);
+ munmap(ptr, sz);
+ dprintf3("%s() ptr: %p DONE\n", __func__, ptr);
+}
+
+#define NR_MALLOCS 100
+struct one_malloc {
+ char *ptr;
+ int nr_filled_btes;
+ unsigned long size;
+};
+struct one_malloc mallocs[NR_MALLOCS];
+
+void free_one_malloc(int index)
+{
+ unsigned long free_ptr;
+ unsigned long mask;
+
+ if (!mallocs[index].ptr)
+ return;
+
+ mpx_mini_free(mallocs[index].ptr, mallocs[index].size);
+ dprintf4("freed[%d]: %p\n", index, mallocs[index].ptr);
+
+ free_ptr = (unsigned long)mallocs[index].ptr;
+ mask = alignment-1;
+ dprintf4("lowerbits: %lx / %lx mask: %lx\n", free_ptr,
+ (free_ptr & mask), mask);
+ assert((free_ptr & mask) == 0);
+
+ mallocs[index].ptr = NULL;
+}
+
+#ifdef __i386__
+#define MPX_BOUNDS_TABLE_COVERS 4096
+#else
+#define MPX_BOUNDS_TABLE_COVERS (1 * MB)
+#endif
+void zap_everything(void)
+{
+ long after_zap;
+ long before_zap;
+ int i;
+
+ before_zap = inspect_me(bounds_dir_ptr);
+ dprintf1("zapping everything start: %ld\n", before_zap);
+ for (i = 0; i < NR_MALLOCS; i++)
+ free_one_malloc(i);
+
+ after_zap = inspect_me(bounds_dir_ptr);
+ dprintf1("zapping everything done: %ld\n", after_zap);
+ /*
+ * We only guarantee to empty the thing out if our allocations are
+ * exactly aligned on the boundaries of a boudns table.
+ */
+ if ((alignment >= MPX_BOUNDS_TABLE_COVERS) &&
+ (sz_alignment >= MPX_BOUNDS_TABLE_COVERS)) {
+ if (after_zap != 0)
+ test_failed();
+
+ assert(after_zap == 0);
+ }
+}
+
+void do_one_malloc(void)
+{
+ static int malloc_counter;
+ long sz;
+ int rand_index = (mpx_random() % NR_MALLOCS);
+ void *ptr = mallocs[rand_index].ptr;
+
+ dprintf3("%s() enter\n", __func__);
+
+ if (ptr) {
+ dprintf3("freeing one malloc at index: %d\n", rand_index);
+ free_one_malloc(rand_index);
+ if (mpx_random() % (NR_MALLOCS*3) == 3) {
+ int i;
+ dprintf3("zapping some more\n");
+ for (i = rand_index; i < NR_MALLOCS; i++)
+ free_one_malloc(i);
+ }
+ if ((mpx_random() % zap_all_every_this_many_mallocs) == 4)
+ zap_everything();
+ }
+
+ /* 1->~1M */
+ sz = (1 + mpx_random() % 1000) * 1000;
+ ptr = mpx_mini_alloc(sz);
+ if (!ptr) {
+ /*
+ * If we are failing allocations, just assume we
+ * are out of memory and zap everything.
+ */
+ dprintf3("zapping everything because out of memory\n");
+ zap_everything();
+ goto out;
+ }
+
+ dprintf3("malloc: %p size: 0x%lx\n", ptr, sz);
+ mallocs[rand_index].nr_filled_btes = cover_buf_with_bt_entries(ptr, sz);
+ mallocs[rand_index].ptr = ptr;
+ mallocs[rand_index].size = sz;
+out:
+ if ((++malloc_counter) % inspect_every_this_many_mallocs == 0)
+ inspect_me(bounds_dir_ptr);
+}
+
+void run_timed_test(void (*test_func)(void))
+{
+ int done = 0;
+ long iteration = 0;
+ static time_t last_print;
+ time_t now;
+ time_t start;
+
+ time(&start);
+ while (!done) {
+ time(&now);
+ if ((now - start) > TEST_DURATION_SECS)
+ done = 1;
+
+ test_func();
+ iteration++;
+
+ if ((now - last_print > 1) || done) {
+ printf("iteration %ld complete, OK so far\n", iteration);
+ last_print = now;
+ }
+ }
+}
+
+void check_bounds_table_frees(void)
+{
+ printf("executing unmaptest\n");
+ inspect_me(bounds_dir_ptr);
+ run_timed_test(&do_one_malloc);
+ printf("done with malloc() fun\n");
+}
+
+void insn_test_failed(int test_nr, int test_round, void *buf,
+ void *buf_shadow, void *ptr)
+{
+ print_context(xsave_test_buf);
+ eprintf("ERROR: test %d round %d failed\n", test_nr, test_round);
+ while (test_nr == 5) {
+ struct mpx_bt_entry *bte;
+ struct mpx_bounds_dir *bd = (void *)bounds_dir_ptr;
+ struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(buf, bd);
+
+ printf(" bd: %p\n", bd);
+ printf("&bde: %p\n", bde);
+ printf("*bde: %lx\n", *(unsigned long *)bde);
+ if (!bd_entry_valid(bde))
+ break;
+
+ bte = mpx_vaddr_to_bt_entry(buf, bd);
+ printf(" te: %p\n", bte);
+ printf("bte[0]: %lx\n", bte->contents[0]);
+ printf("bte[1]: %lx\n", bte->contents[1]);
+ printf("bte[2]: %lx\n", bte->contents[2]);
+ printf("bte[3]: %lx\n", bte->contents[3]);
+ break;
+ }
+ test_failed();
+}
+
+void check_mpx_insns_and_tables(void)
+{
+ int successes = 0;
+ int failures = 0;
+ int buf_size = (1024*1024);
+ unsigned long *buf = malloc(buf_size);
+ const int total_nr_tests = NR_MPX_TEST_FUNCTIONS * TEST_ROUNDS;
+ int i, j;
+
+ memset(buf, 0, buf_size);
+ memset(buf_shadow, 0, sizeof(buf_shadow));
+
+ for (i = 0; i < TEST_ROUNDS; i++) {
+ uint8_t *ptr = get_random_addr() + 8;
+
+ for (j = 0; j < NR_MPX_TEST_FUNCTIONS; j++) {
+ if (0 && j != 5) {
+ successes++;
+ continue;
+ }
+ dprintf2("starting test %d round %d\n", j, i);
+ dprint_context(xsave_test_buf);
+ /*
+ * test5 loads an address from the bounds tables.
+ * The load will only complete if 'ptr' matches
+ * the load and the store, so with random addrs,
+ * the odds of this are very small. Make it
+ * higher by only moving 'ptr' 1/10 times.
+ */
+ if (random() % 10 <= 0)
+ ptr = get_random_addr() + 8;
+ dprintf3("random ptr{%p}\n", ptr);
+ dprint_context(xsave_test_buf);
+ run_helpers(j, (void *)buf, (void *)buf_shadow, ptr);
+ dprint_context(xsave_test_buf);
+ if (!compare_context(xsave_test_buf)) {
+ insn_test_failed(j, i, buf, buf_shadow, ptr);
+ failures++;
+ goto exit;
+ }
+ successes++;
+ dprint_context(xsave_test_buf);
+ dprintf2("finished test %d round %d\n", j, i);
+ dprintf3("\n");
+ dprint_context(xsave_test_buf);
+ }
+ }
+
+exit:
+ dprintf2("\nabout to free:\n");
+ free(buf);
+ dprintf1("successes: %d\n", successes);
+ dprintf1(" failures: %d\n", failures);
+ dprintf1(" tests: %d\n", total_nr_tests);
+ dprintf1(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
+ dprintf1(" saw: %d #BRs\n", br_count);
+ if (failures) {
+ eprintf("ERROR: non-zero number of failures\n");
+ exit(20);
+ }
+ if (successes != total_nr_tests) {
+ eprintf("ERROR: succeded fewer than number of tries (%d != %d)\n",
+ successes, total_nr_tests);
+ exit(21);
+ }
+ if (num_upper_brs + num_lower_brs != br_count) {
+ eprintf("ERROR: unexpected number of #BRs: %jd %jd %d\n",
+ num_upper_brs, num_lower_brs, br_count);
+ eprintf("successes: %d\n", successes);
+ eprintf(" failures: %d\n", failures);
+ eprintf(" tests: %d\n", total_nr_tests);
+ eprintf(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
+ eprintf(" saw: %d #BRs\n", br_count);
+ exit(22);
+ }
+}
+
+/*
+ * This is supposed to SIGSEGV nicely once the kernel
+ * can no longer allocate vaddr space.
+ */
+void exhaust_vaddr_space(void)
+{
+ unsigned long ptr;
+ /* Try to make sure there is no room for a bounds table anywhere */
+ unsigned long skip = MPX_BOUNDS_TABLE_SIZE_BYTES - PAGE_SIZE;
+#ifdef __i386__
+ unsigned long max_vaddr = 0xf7788000UL;
+#else
+ unsigned long max_vaddr = 0x800000000000UL;
+#endif
+
+ dprintf1("%s() start\n", __func__);
+ /* do not start at 0, we aren't allowed to map there */
+ for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
+ void *ptr_ret;
+ int ret = madvise((void *)ptr, PAGE_SIZE, MADV_NORMAL);
+
+ if (!ret) {
+ dprintf1("madvise() %lx ret: %d\n", ptr, ret);
+ continue;
+ }
+ ptr_ret = mmap((void *)ptr, PAGE_SIZE, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ if (ptr_ret != (void *)ptr) {
+ perror("mmap");
+ dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
+ break;
+ }
+ if (!(ptr & 0xffffff))
+ dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
+ }
+ for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
+ dprintf2("covering 0x%lx with bounds table entries\n", ptr);
+ cover_buf_with_bt_entries((void *)ptr, PAGE_SIZE);
+ }
+ dprintf1("%s() end\n", __func__);
+ printf("done with vaddr space fun\n");
+}
+
+void mpx_table_test(void)
+{
+ printf("starting mpx bounds table test\n");
+ run_timed_test(check_mpx_insns_and_tables);
+ printf("done with mpx bounds table test\n");
+}
+
+int main(int argc, char **argv)
+{
+ int unmaptest = 0;
+ int vaddrexhaust = 0;
+ int tabletest = 0;
+ int i;
+
+ check_mpx_support();
+ mpx_prepare();
+ srandom(11179);
+
+ bd_incore();
+ init();
+ bd_incore();
+
+ trace_me();
+
+ xsave_state((void *)xsave_test_buf, 0x1f);
+ if (!compare_context(xsave_test_buf))
+ printf("Init failed\n");
+
+ for (i = 1; i < argc; i++) {
+ if (!strcmp(argv[i], "unmaptest"))
+ unmaptest = 1;
+ if (!strcmp(argv[i], "vaddrexhaust"))
+ vaddrexhaust = 1;
+ if (!strcmp(argv[i], "tabletest"))
+ tabletest = 1;
+ }
+ if (!(unmaptest || vaddrexhaust || tabletest)) {
+ unmaptest = 1;
+ /* vaddrexhaust = 1; */
+ tabletest = 1;
+ }
+ if (unmaptest)
+ check_bounds_table_frees();
+ if (tabletest)
+ mpx_table_test();
+ if (vaddrexhaust)
+ exhaust_vaddr_space();
+ printf("%s completed successfully\n", argv[0]);
+ exit(0);
+}
+
+#include "mpx-dig.c"
diff --git a/tools/testing/selftests/x86/mpx-mm.h b/tools/testing/selftests/x86/mpx-mm.h
new file mode 100644
index 000000000..6dbdd66b8
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-mm.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MPX_MM_H
+#define _MPX_MM_H
+
+#define PAGE_SIZE 4096
+#define MB (1UL<<20)
+
+extern long nr_incore(void *ptr, unsigned long size_bytes);
+
+#endif /* _MPX_MM_H */
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h
new file mode 100644
index 000000000..254e5436b
--- /dev/null
+++ b/tools/testing/selftests/x86/pkey-helpers.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PKEYS_HELPER_H
+#define _PKEYS_HELPER_H
+#define _GNU_SOURCE
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+
+#define NR_PKEYS 16
+#define PKRU_BITS_PER_PKEY 2
+
+#ifndef DEBUG_LEVEL
+#define DEBUG_LEVEL 0
+#endif
+#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
+extern int dprint_in_signal;
+extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+static inline void sigsafe_printf(const char *format, ...)
+{
+ va_list ap;
+
+ if (!dprint_in_signal) {
+ va_start(ap, format);
+ vprintf(format, ap);
+ va_end(ap);
+ } else {
+ int ret;
+ /*
+ * No printf() functions are signal-safe.
+ * They deadlock easily. Write the format
+ * string to get some output, even if
+ * incomplete.
+ */
+ ret = write(1, format, strlen(format));
+ if (ret < 0)
+ exit(1);
+ }
+}
+#define dprintf_level(level, args...) do { \
+ if (level <= DEBUG_LEVEL) \
+ sigsafe_printf(args); \
+} while (0)
+#define dprintf0(args...) dprintf_level(0, args)
+#define dprintf1(args...) dprintf_level(1, args)
+#define dprintf2(args...) dprintf_level(2, args)
+#define dprintf3(args...) dprintf_level(3, args)
+#define dprintf4(args...) dprintf_level(4, args)
+
+extern unsigned int shadow_pkru;
+static inline unsigned int __rdpkru(void)
+{
+ unsigned int eax, edx;
+ unsigned int ecx = 0;
+ unsigned int pkru;
+
+ asm volatile(".byte 0x0f,0x01,0xee\n\t"
+ : "=a" (eax), "=d" (edx)
+ : "c" (ecx));
+ pkru = eax;
+ return pkru;
+}
+
+static inline unsigned int _rdpkru(int line)
+{
+ unsigned int pkru = __rdpkru();
+
+ dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n",
+ line, pkru, shadow_pkru);
+ assert(pkru == shadow_pkru);
+
+ return pkru;
+}
+
+#define rdpkru() _rdpkru(__LINE__)
+
+static inline void __wrpkru(unsigned int pkru)
+{
+ unsigned int eax = pkru;
+ unsigned int ecx = 0;
+ unsigned int edx = 0;
+
+ dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (eax), "c" (ecx), "d" (edx));
+ assert(pkru == __rdpkru());
+}
+
+static inline void wrpkru(unsigned int pkru)
+{
+ dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
+ /* will do the shadow check for us: */
+ rdpkru();
+ __wrpkru(pkru);
+ shadow_pkru = pkru;
+ dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru());
+}
+
+/*
+ * These are technically racy. since something could
+ * change PKRU between the read and the write.
+ */
+static inline void __pkey_access_allow(int pkey, int do_allow)
+{
+ unsigned int pkru = rdpkru();
+ int bit = pkey * 2;
+
+ if (do_allow)
+ pkru &= (1<<bit);
+ else
+ pkru |= (1<<bit);
+
+ dprintf4("pkru now: %08x\n", rdpkru());
+ wrpkru(pkru);
+}
+
+static inline void __pkey_write_allow(int pkey, int do_allow_write)
+{
+ long pkru = rdpkru();
+ int bit = pkey * 2 + 1;
+
+ if (do_allow_write)
+ pkru &= (1<<bit);
+ else
+ pkru |= (1<<bit);
+
+ wrpkru(pkru);
+ dprintf4("pkru now: %08x\n", rdpkru());
+}
+
+#define PROT_PKEY0 0x10 /* protection key value (bit 0) */
+#define PROT_PKEY1 0x20 /* protection key value (bit 1) */
+#define PROT_PKEY2 0x40 /* protection key value (bit 2) */
+#define PROT_PKEY3 0x80 /* protection key value (bit 3) */
+
+#define PAGE_SIZE 4096
+#define MB (1<<20)
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile(
+ "cpuid;"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
+#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
+
+static inline int cpu_has_pku(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+
+ eax = 0x7;
+ ecx = 0x0;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (!(ecx & X86_FEATURE_PKU)) {
+ dprintf2("cpu does not have PKU\n");
+ return 0;
+ }
+ if (!(ecx & X86_FEATURE_OSPKE)) {
+ dprintf2("cpu does not have OSPKE\n");
+ return 0;
+ }
+ return 1;
+}
+
+#define XSTATE_PKRU_BIT (9)
+#define XSTATE_PKRU 0x200
+
+int pkru_xstate_offset(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+ int xstate_offset;
+ int xstate_size;
+ unsigned long XSTATE_CPUID = 0xd;
+ int leaf;
+
+ /* assume that XSTATE_PKRU is set in XCR0 */
+ leaf = XSTATE_PKRU_BIT;
+ {
+ eax = XSTATE_CPUID;
+ ecx = leaf;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (leaf == XSTATE_PKRU_BIT) {
+ xstate_offset = ebx;
+ xstate_size = eax;
+ }
+ }
+
+ if (xstate_size == 0) {
+ printf("could not find size/offset of PKRU in xsave state\n");
+ return 0;
+ }
+
+ return xstate_offset;
+}
+
+#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
new file mode 100644
index 000000000..27661302a
--- /dev/null
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -0,0 +1,1508 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt)
+ *
+ * There are examples in here of:
+ * * how to set protection keys on memory
+ * * how to set/clear bits in PKRU (the rights register)
+ * * how to handle SEGV_PKRU signals and extract pkey-relevant
+ * information from the siginfo
+ *
+ * Things to add:
+ * make sure KSM and KSM COW breaking works
+ * prefault pages in at malloc, or not
+ * protect MPX bounds tables with protection keys?
+ * make sure VMA splitting/merging is working correctly
+ * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
+ * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
+ * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
+ *
+ * Compile like this:
+ * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/futex.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+#include <setjmp.h>
+
+#include "pkey-helpers.h"
+
+int iteration_nr = 1;
+int test_nr;
+
+unsigned int shadow_pkru;
+
+#define HPAGE_SIZE (1UL<<21)
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
+#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
+#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
+#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+
+#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
+
+int dprint_in_signal;
+char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+
+extern void abort_hooks(void);
+#define pkey_assert(condition) do { \
+ if (!(condition)) { \
+ dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
+ __FILE__, __LINE__, \
+ test_nr, iteration_nr); \
+ dprintf0("errno at assert: %d", errno); \
+ abort_hooks(); \
+ exit(__LINE__); \
+ } \
+} while (0)
+
+void cat_into_file(char *str, char *file)
+{
+ int fd = open(file, O_RDWR);
+ int ret;
+
+ dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file);
+ /*
+ * these need to be raw because they are called under
+ * pkey_assert()
+ */
+ if (fd < 0) {
+ fprintf(stderr, "error opening '%s'\n", str);
+ perror("error: ");
+ exit(__LINE__);
+ }
+
+ ret = write(fd, str, strlen(str));
+ if (ret != strlen(str)) {
+ perror("write to file failed");
+ fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
+ exit(__LINE__);
+ }
+ close(fd);
+}
+
+#if CONTROL_TRACING > 0
+static int warned_tracing;
+int tracing_root_ok(void)
+{
+ if (geteuid() != 0) {
+ if (!warned_tracing)
+ fprintf(stderr, "WARNING: not run as root, "
+ "can not do tracing control\n");
+ warned_tracing = 1;
+ return 0;
+ }
+ return 1;
+}
+#endif
+
+void tracing_on(void)
+{
+#if CONTROL_TRACING > 0
+#define TRACEDIR "/sys/kernel/debug/tracing"
+ char pidstr[32];
+
+ if (!tracing_root_ok())
+ return;
+
+ sprintf(pidstr, "%d", getpid());
+ cat_into_file("0", TRACEDIR "/tracing_on");
+ cat_into_file("\n", TRACEDIR "/trace");
+ if (1) {
+ cat_into_file("function_graph", TRACEDIR "/current_tracer");
+ cat_into_file("1", TRACEDIR "/options/funcgraph-proc");
+ } else {
+ cat_into_file("nop", TRACEDIR "/current_tracer");
+ }
+ cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid");
+ cat_into_file("1", TRACEDIR "/tracing_on");
+ dprintf1("enabled tracing\n");
+#endif
+}
+
+void tracing_off(void)
+{
+#if CONTROL_TRACING > 0
+ if (!tracing_root_ok())
+ return;
+ cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on");
+#endif
+}
+
+void abort_hooks(void)
+{
+ fprintf(stderr, "running %s()...\n", __func__);
+ tracing_off();
+#ifdef SLEEP_ON_ABORT
+ sleep(SLEEP_ON_ABORT);
+#endif
+}
+
+static inline void __page_o_noops(void)
+{
+ /* 8-bytes of instruction * 512 bytes = 1 page */
+ asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
+}
+
+/*
+ * This attempts to have roughly a page of instructions followed by a few
+ * instructions that do a write, and another page of instructions. That
+ * way, we are pretty sure that the write is in the second page of
+ * instructions and has at least a page of padding behind it.
+ *
+ * *That* lets us be sure to madvise() away the write instruction, which
+ * will then fault, which makes sure that the fault code handles
+ * execute-only memory properly.
+ */
+__attribute__((__aligned__(PAGE_SIZE)))
+void lots_o_noops_around_write(int *write_to_me)
+{
+ dprintf3("running %s()\n", __func__);
+ __page_o_noops();
+ /* Assume this happens in the second page of instructions: */
+ *write_to_me = __LINE__;
+ /* pad out by another page: */
+ __page_o_noops();
+ dprintf3("%s() done\n", __func__);
+}
+
+/* Define some kernel-like types */
+#define u8 uint8_t
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+#ifdef __i386__
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 380
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 381
+# define SYS_pkey_free 382
+#endif
+
+#define REG_IP_IDX REG_EIP
+#define si_pkey_offset 0x14
+
+#else
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 329
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 330
+# define SYS_pkey_free 331
+#endif
+
+#define REG_IP_IDX REG_RIP
+#define si_pkey_offset 0x20
+
+#endif
+
+void dump_mem(void *dumpme, int len_bytes)
+{
+ char *c = (void *)dumpme;
+ int i;
+
+ for (i = 0; i < len_bytes; i += sizeof(u64)) {
+ u64 *ptr = (u64 *)(c + i);
+ dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
+ }
+}
+
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR 3
+#endif
+
+#ifndef SEGV_PKUERR
+# define SEGV_PKUERR 4
+#endif
+
+static char *si_code_str(int si_code)
+{
+ if (si_code == SEGV_MAPERR)
+ return "SEGV_MAPERR";
+ if (si_code == SEGV_ACCERR)
+ return "SEGV_ACCERR";
+ if (si_code == SEGV_BNDERR)
+ return "SEGV_BNDERR";
+ if (si_code == SEGV_PKUERR)
+ return "SEGV_PKUERR";
+ return "UNKNOWN";
+}
+
+int pkru_faults;
+int last_si_pkey = -1;
+void signal_handler(int signum, siginfo_t *si, void *vucontext)
+{
+ ucontext_t *uctxt = vucontext;
+ int trapno;
+ unsigned long ip;
+ char *fpregs;
+ u32 *pkru_ptr;
+ u64 siginfo_pkey;
+ u32 *si_pkey_ptr;
+ int pkru_offset;
+ fpregset_t fpregset;
+
+ dprint_in_signal = 1;
+ dprintf1(">>>>===============SIGSEGV============================\n");
+ dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
+ __rdpkru(), shadow_pkru);
+
+ trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
+ ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
+ fpregset = uctxt->uc_mcontext.fpregs;
+ fpregs = (void *)fpregset;
+
+ dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
+ trapno, ip, si_code_str(si->si_code), si->si_code);
+#ifdef __i386__
+ /*
+ * 32-bit has some extra padding so that userspace can tell whether
+ * the XSTATE header is present in addition to the "legacy" FPU
+ * state. We just assume that it is here.
+ */
+ fpregs += 0x70;
+#endif
+ pkru_offset = pkru_xstate_offset();
+ pkru_ptr = (void *)(&fpregs[pkru_offset]);
+
+ dprintf1("siginfo: %p\n", si);
+ dprintf1(" fpregs: %p\n", fpregs);
+ /*
+ * If we got a PKRU fault, we *HAVE* to have at least one bit set in
+ * here.
+ */
+ dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
+ if (DEBUG_LEVEL > 4)
+ dump_mem(pkru_ptr - 128, 256);
+ pkey_assert(*pkru_ptr);
+
+ if ((si->si_code == SEGV_MAPERR) ||
+ (si->si_code == SEGV_ACCERR) ||
+ (si->si_code == SEGV_BNDERR)) {
+ printf("non-PK si_code, exiting...\n");
+ exit(4);
+ }
+
+ si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
+ dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
+ dump_mem((u8 *)si_pkey_ptr - 8, 24);
+ siginfo_pkey = *si_pkey_ptr;
+ pkey_assert(siginfo_pkey < NR_PKEYS);
+ last_si_pkey = siginfo_pkey;
+
+ dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
+ /* need __rdpkru() version so we do not do shadow_pkru checking */
+ dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
+ dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
+ *(u64 *)pkru_ptr = 0x00000000;
+ dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
+ pkru_faults++;
+ dprintf1("<<<<==================================================\n");
+ dprint_in_signal = 0;
+}
+
+int wait_all_children(void)
+{
+ int status;
+ return waitpid(-1, &status, 0);
+}
+
+void sig_chld(int x)
+{
+ dprint_in_signal = 1;
+ dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
+ dprint_in_signal = 0;
+}
+
+void setup_sigsegv_handler(void)
+{
+ int r, rs;
+ struct sigaction newact;
+ struct sigaction oldact;
+
+ /* #PF is mapped to sigsegv */
+ int signum = SIGSEGV;
+
+ newact.sa_handler = 0;
+ newact.sa_sigaction = signal_handler;
+
+ /*sigset_t - signals to block while in the handler */
+ /* get the old signal mask. */
+ rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
+ pkey_assert(rs == 0);
+
+ /* call sa_sigaction, not sa_handler*/
+ newact.sa_flags = SA_SIGINFO;
+
+ newact.sa_restorer = 0; /* void(*)(), obsolete */
+ r = sigaction(signum, &newact, &oldact);
+ r = sigaction(SIGALRM, &newact, &oldact);
+ pkey_assert(r == 0);
+}
+
+void setup_handlers(void)
+{
+ signal(SIGCHLD, &sig_chld);
+ setup_sigsegv_handler();
+}
+
+pid_t fork_lazy_child(void)
+{
+ pid_t forkret;
+
+ forkret = fork();
+ pkey_assert(forkret >= 0);
+ dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
+
+ if (!forkret) {
+ /* in the child */
+ while (1) {
+ dprintf1("child sleeping...\n");
+ sleep(30);
+ }
+ }
+ return forkret;
+}
+
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS 0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE 0x2
+#endif
+
+static u32 hw_pkey_get(int pkey, unsigned long flags)
+{
+ u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
+ u32 pkru = __rdpkru();
+ u32 shifted_pkru;
+ u32 masked_pkru;
+
+ dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
+ __func__, pkey, flags, 0, 0);
+ dprintf2("%s() raw pkru: %x\n", __func__, pkru);
+
+ shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
+ dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
+ masked_pkru = shifted_pkru & mask;
+ dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru);
+ /*
+ * shift down the relevant bits to the lowest two, then
+ * mask off all the other high bits.
+ */
+ return masked_pkru;
+}
+
+static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
+{
+ u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
+ u32 old_pkru = __rdpkru();
+ u32 new_pkru;
+
+ /* make sure that 'rights' only contains the bits we expect: */
+ assert(!(rights & ~mask));
+
+ /* copy old pkru */
+ new_pkru = old_pkru;
+ /* mask out bits from pkey in old value: */
+ new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
+ /* OR in new bits for pkey: */
+ new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
+
+ __wrpkru(new_pkru);
+
+ dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
+ __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
+ return 0;
+}
+
+void pkey_disable_set(int pkey, int flags)
+{
+ unsigned long syscall_flags = 0;
+ int ret;
+ int pkey_rights;
+ u32 orig_pkru = rdpkru();
+
+ dprintf1("START->%s(%d, 0x%x)\n", __func__,
+ pkey, flags);
+ pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+ pkey_assert(pkey_rights >= 0);
+
+ pkey_rights |= flags;
+
+ ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
+ assert(!ret);
+ /*pkru and flags have the same format */
+ shadow_pkru |= flags << (pkey * 2);
+ dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
+
+ pkey_assert(ret >= 0);
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+
+ dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
+ if (flags)
+ pkey_assert(rdpkru() > orig_pkru);
+ dprintf1("END<---%s(%d, 0x%x)\n", __func__,
+ pkey, flags);
+}
+
+void pkey_disable_clear(int pkey, int flags)
+{
+ unsigned long syscall_flags = 0;
+ int ret;
+ int pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ u32 orig_pkru = rdpkru();
+
+ pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+ pkey_assert(pkey_rights >= 0);
+
+ pkey_rights |= flags;
+
+ ret = hw_pkey_set(pkey, pkey_rights, 0);
+ /* pkru and flags have the same format */
+ shadow_pkru &= ~(flags << (pkey * 2));
+ pkey_assert(ret >= 0);
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+
+ dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
+ if (flags)
+ assert(rdpkru() > orig_pkru);
+}
+
+void pkey_write_allow(int pkey)
+{
+ pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_write_deny(int pkey)
+{
+ pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_access_allow(int pkey)
+{
+ pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
+}
+void pkey_access_deny(int pkey)
+{
+ pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
+}
+
+int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey)
+{
+ int sret;
+
+ dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
+ ptr, size, orig_prot, pkey);
+
+ errno = 0;
+ sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey);
+ if (errno) {
+ dprintf2("SYS_mprotect_key sret: %d\n", sret);
+ dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
+ dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
+ if (DEBUG_LEVEL >= 2)
+ perror("SYS_mprotect_pkey");
+ }
+ return sret;
+}
+
+int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
+{
+ int ret = syscall(SYS_pkey_alloc, flags, init_val);
+ dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
+ __func__, flags, init_val, ret, errno);
+ return ret;
+}
+
+int alloc_pkey(void)
+{
+ int ret;
+ unsigned long init_val = 0x0;
+
+ dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
+ __LINE__, __rdpkru(), shadow_pkru);
+ ret = sys_pkey_alloc(0, init_val);
+ /*
+ * pkey_alloc() sets PKRU, so we need to reflect it in
+ * shadow_pkru:
+ */
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ if (ret) {
+ /* clear both the bits: */
+ shadow_pkru &= ~(0x3 << (ret * 2));
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ /*
+ * move the new state in from init_val
+ * (remember, we cheated and init_val == pkru format)
+ */
+ shadow_pkru |= (init_val << (ret * 2));
+ }
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
+ /* for shadow checking: */
+ rdpkru();
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ return ret;
+}
+
+int sys_pkey_free(unsigned long pkey)
+{
+ int ret = syscall(SYS_pkey_free, pkey);
+ dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
+ return ret;
+}
+
+/*
+ * I had a bug where pkey bits could be set by mprotect() but
+ * not cleared. This ensures we get lots of random bit sets
+ * and clears on the vma and pte pkey bits.
+ */
+int alloc_random_pkey(void)
+{
+ int max_nr_pkey_allocs;
+ int ret;
+ int i;
+ int alloced_pkeys[NR_PKEYS];
+ int nr_alloced = 0;
+ int random_index;
+ memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
+
+ /* allocate every possible key and make a note of which ones we got */
+ max_nr_pkey_allocs = NR_PKEYS;
+ for (i = 0; i < max_nr_pkey_allocs; i++) {
+ int new_pkey = alloc_pkey();
+ if (new_pkey < 0)
+ break;
+ alloced_pkeys[nr_alloced++] = new_pkey;
+ }
+
+ pkey_assert(nr_alloced > 0);
+ /* select a random one out of the allocated ones */
+ random_index = rand() % nr_alloced;
+ ret = alloced_pkeys[random_index];
+ /* now zero it out so we don't free it next */
+ alloced_pkeys[random_index] = 0;
+
+ /* go through the allocated ones that we did not want and free them */
+ for (i = 0; i < nr_alloced; i++) {
+ int free_ret;
+ if (!alloced_pkeys[i])
+ continue;
+ free_ret = sys_pkey_free(alloced_pkeys[i]);
+ pkey_assert(!free_ret);
+ }
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ return ret;
+}
+
+int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey)
+{
+ int nr_iterations = random() % 100;
+ int ret;
+
+ while (0) {
+ int rpkey = alloc_random_pkey();
+ ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
+ dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
+ ptr, size, orig_prot, pkey, ret);
+ if (nr_iterations-- < 0)
+ break;
+
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ sys_pkey_free(rpkey);
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ }
+ pkey_assert(pkey < NR_PKEYS);
+
+ ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
+ dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
+ ptr, size, orig_prot, pkey, ret);
+ pkey_assert(!ret);
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ return ret;
+}
+
+struct pkey_malloc_record {
+ void *ptr;
+ long size;
+ int prot;
+};
+struct pkey_malloc_record *pkey_malloc_records;
+struct pkey_malloc_record *pkey_last_malloc_record;
+long nr_pkey_malloc_records;
+void record_pkey_malloc(void *ptr, long size, int prot)
+{
+ long i;
+ struct pkey_malloc_record *rec = NULL;
+
+ for (i = 0; i < nr_pkey_malloc_records; i++) {
+ rec = &pkey_malloc_records[i];
+ /* find a free record */
+ if (rec)
+ break;
+ }
+ if (!rec) {
+ /* every record is full */
+ size_t old_nr_records = nr_pkey_malloc_records;
+ size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1);
+ size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record);
+ dprintf2("new_nr_records: %zd\n", new_nr_records);
+ dprintf2("new_size: %zd\n", new_size);
+ pkey_malloc_records = realloc(pkey_malloc_records, new_size);
+ pkey_assert(pkey_malloc_records != NULL);
+ rec = &pkey_malloc_records[nr_pkey_malloc_records];
+ /*
+ * realloc() does not initialize memory, so zero it from
+ * the first new record all the way to the end.
+ */
+ for (i = 0; i < new_nr_records - old_nr_records; i++)
+ memset(rec + i, 0, sizeof(*rec));
+ }
+ dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n",
+ (int)(rec - pkey_malloc_records), rec, ptr, size);
+ rec->ptr = ptr;
+ rec->size = size;
+ rec->prot = prot;
+ pkey_last_malloc_record = rec;
+ nr_pkey_malloc_records++;
+}
+
+void free_pkey_malloc(void *ptr)
+{
+ long i;
+ int ret;
+ dprintf3("%s(%p)\n", __func__, ptr);
+ for (i = 0; i < nr_pkey_malloc_records; i++) {
+ struct pkey_malloc_record *rec = &pkey_malloc_records[i];
+ dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n",
+ ptr, i, rec, rec->ptr, rec->size);
+ if ((ptr < rec->ptr) ||
+ (ptr >= rec->ptr + rec->size))
+ continue;
+
+ dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n",
+ ptr, i, rec, rec->ptr, rec->size);
+ nr_pkey_malloc_records--;
+ ret = munmap(rec->ptr, rec->size);
+ dprintf3("munmap ret: %d\n", ret);
+ pkey_assert(!ret);
+ dprintf3("clearing rec->ptr, rec: %p\n", rec);
+ rec->ptr = NULL;
+ dprintf3("done clearing rec->ptr, rec: %p\n", rec);
+ return;
+ }
+ pkey_assert(false);
+}
+
+
+void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
+{
+ void *ptr;
+ int ret;
+
+ rdpkru();
+ dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+ size, prot, pkey);
+ pkey_assert(pkey < NR_PKEYS);
+ ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ pkey_assert(ptr != (void *)-1);
+ ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
+ pkey_assert(!ret);
+ record_pkey_malloc(ptr, size, prot);
+ rdpkru();
+
+ dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
+ return ptr;
+}
+
+void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
+{
+ int ret;
+ void *ptr;
+
+ dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+ size, prot, pkey);
+ /*
+ * Guarantee we can fit at least one huge page in the resulting
+ * allocation by allocating space for 2:
+ */
+ size = ALIGN_UP(size, HPAGE_SIZE * 2);
+ ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ pkey_assert(ptr != (void *)-1);
+ record_pkey_malloc(ptr, size, prot);
+ mprotect_pkey(ptr, size, prot, pkey);
+
+ dprintf1("unaligned ptr: %p\n", ptr);
+ ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE);
+ dprintf1(" aligned ptr: %p\n", ptr);
+ ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE);
+ dprintf1("MADV_HUGEPAGE ret: %d\n", ret);
+ ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED);
+ dprintf1("MADV_WILLNEED ret: %d\n", ret);
+ memset(ptr, 0, HPAGE_SIZE);
+
+ dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr);
+ return ptr;
+}
+
+int hugetlb_setup_ok;
+#define GET_NR_HUGE_PAGES 10
+void setup_hugetlbfs(void)
+{
+ int err;
+ int fd;
+ char buf[] = "123";
+
+ if (geteuid() != 0) {
+ fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
+ return;
+ }
+
+ cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages");
+
+ /*
+ * Now go make sure that we got the pages and that they
+ * are 2M pages. Someone might have made 1G the default.
+ */
+ fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
+ if (fd < 0) {
+ perror("opening sysfs 2M hugetlb config");
+ return;
+ }
+
+ /* -1 to guarantee leaving the trailing \0 */
+ err = read(fd, buf, sizeof(buf)-1);
+ close(fd);
+ if (err <= 0) {
+ perror("reading sysfs 2M hugetlb config");
+ return;
+ }
+
+ if (atoi(buf) != GET_NR_HUGE_PAGES) {
+ fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n",
+ buf, GET_NR_HUGE_PAGES);
+ return;
+ }
+
+ hugetlb_setup_ok = 1;
+}
+
+void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
+{
+ void *ptr;
+ int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
+
+ if (!hugetlb_setup_ok)
+ return PTR_ERR_ENOTSUP;
+
+ dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey);
+ size = ALIGN_UP(size, HPAGE_SIZE * 2);
+ pkey_assert(pkey < NR_PKEYS);
+ ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
+ pkey_assert(ptr != (void *)-1);
+ mprotect_pkey(ptr, size, prot, pkey);
+
+ record_pkey_malloc(ptr, size, prot);
+
+ dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
+ return ptr;
+}
+
+void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
+{
+ void *ptr;
+ int fd;
+
+ dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+ size, prot, pkey);
+ pkey_assert(pkey < NR_PKEYS);
+ fd = open("/dax/foo", O_RDWR);
+ pkey_assert(fd >= 0);
+
+ ptr = mmap(0, size, prot, MAP_SHARED, fd, 0);
+ pkey_assert(ptr != (void *)-1);
+
+ mprotect_pkey(ptr, size, prot, pkey);
+
+ record_pkey_malloc(ptr, size, prot);
+
+ dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
+ close(fd);
+ return ptr;
+}
+
+void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
+
+ malloc_pkey_with_mprotect,
+ malloc_pkey_anon_huge,
+ malloc_pkey_hugetlb
+/* can not do direct with the pkey_mprotect() API:
+ malloc_pkey_mmap_direct,
+ malloc_pkey_mmap_dax,
+*/
+};
+
+void *malloc_pkey(long size, int prot, u16 pkey)
+{
+ void *ret;
+ static int malloc_type;
+ int nr_malloc_types = ARRAY_SIZE(pkey_malloc);
+
+ pkey_assert(pkey < NR_PKEYS);
+
+ while (1) {
+ pkey_assert(malloc_type < nr_malloc_types);
+
+ ret = pkey_malloc[malloc_type](size, prot, pkey);
+ pkey_assert(ret != (void *)-1);
+
+ malloc_type++;
+ if (malloc_type >= nr_malloc_types)
+ malloc_type = (random()%nr_malloc_types);
+
+ /* try again if the malloc_type we tried is unsupported */
+ if (ret == PTR_ERR_ENOTSUP)
+ continue;
+
+ break;
+ }
+
+ dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__,
+ size, prot, pkey, ret);
+ return ret;
+}
+
+int last_pkru_faults;
+#define UNKNOWN_PKEY -2
+void expected_pk_fault(int pkey)
+{
+ dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
+ __func__, last_pkru_faults, pkru_faults);
+ dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
+ pkey_assert(last_pkru_faults + 1 == pkru_faults);
+
+ /*
+ * For exec-only memory, we do not know the pkey in
+ * advance, so skip this check.
+ */
+ if (pkey != UNKNOWN_PKEY)
+ pkey_assert(last_si_pkey == pkey);
+
+ /*
+ * The signal handler shold have cleared out PKRU to let the
+ * test program continue. We now have to restore it.
+ */
+ if (__rdpkru() != 0)
+ pkey_assert(0);
+
+ __wrpkru(shadow_pkru);
+ dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
+ __func__, shadow_pkru);
+ last_pkru_faults = pkru_faults;
+ last_si_pkey = -1;
+}
+
+#define do_not_expect_pk_fault(msg) do { \
+ if (last_pkru_faults != pkru_faults) \
+ dprintf0("unexpected PK fault: %s\n", msg); \
+ pkey_assert(last_pkru_faults == pkru_faults); \
+} while (0)
+
+int test_fds[10] = { -1 };
+int nr_test_fds;
+void __save_test_fd(int fd)
+{
+ pkey_assert(fd >= 0);
+ pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
+ test_fds[nr_test_fds] = fd;
+ nr_test_fds++;
+}
+
+int get_test_read_fd(void)
+{
+ int test_fd = open("/etc/passwd", O_RDONLY);
+ __save_test_fd(test_fd);
+ return test_fd;
+}
+
+void close_test_fds(void)
+{
+ int i;
+
+ for (i = 0; i < nr_test_fds; i++) {
+ if (test_fds[i] < 0)
+ continue;
+ close(test_fds[i]);
+ test_fds[i] = -1;
+ }
+ nr_test_fds = 0;
+}
+
+#define barrier() __asm__ __volatile__("": : :"memory")
+__attribute__((noinline)) int read_ptr(int *ptr)
+{
+ /*
+ * Keep GCC from optimizing this away somehow
+ */
+ barrier();
+ return *ptr;
+}
+
+void test_read_of_write_disabled_region(int *ptr, u16 pkey)
+{
+ int ptr_contents;
+
+ dprintf1("disabling write access to PKEY[1], doing read\n");
+ pkey_write_deny(pkey);
+ ptr_contents = read_ptr(ptr);
+ dprintf1("*ptr: %d\n", ptr_contents);
+ dprintf1("\n");
+}
+void test_read_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ int ptr_contents;
+
+ dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
+ rdpkru();
+ pkey_access_deny(pkey);
+ ptr_contents = read_ptr(ptr);
+ dprintf1("*ptr: %d\n", ptr_contents);
+ expected_pk_fault(pkey);
+}
+void test_write_of_write_disabled_region(int *ptr, u16 pkey)
+{
+ dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
+ pkey_write_deny(pkey);
+ *ptr = __LINE__;
+ expected_pk_fault(pkey);
+}
+void test_write_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
+ pkey_access_deny(pkey);
+ *ptr = __LINE__;
+ expected_pk_fault(pkey);
+}
+void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ int ret;
+ int test_fd = get_test_read_fd();
+
+ dprintf1("disabling access to PKEY[%02d], "
+ "having kernel read() to buffer\n", pkey);
+ pkey_access_deny(pkey);
+ ret = read(test_fd, ptr, 1);
+ dprintf1("read ret: %d\n", ret);
+ pkey_assert(ret);
+}
+void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
+{
+ int ret;
+ int test_fd = get_test_read_fd();
+
+ pkey_write_deny(pkey);
+ ret = read(test_fd, ptr, 100);
+ dprintf1("read ret: %d\n", ret);
+ if (ret < 0 && (DEBUG_LEVEL > 0))
+ perror("verbose read result (OK for this to be bad)");
+ pkey_assert(ret);
+}
+
+void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ int pipe_ret, vmsplice_ret;
+ struct iovec iov;
+ int pipe_fds[2];
+
+ pipe_ret = pipe(pipe_fds);
+
+ pkey_assert(pipe_ret == 0);
+ dprintf1("disabling access to PKEY[%02d], "
+ "having kernel vmsplice from buffer\n", pkey);
+ pkey_access_deny(pkey);
+ iov.iov_base = ptr;
+ iov.iov_len = PAGE_SIZE;
+ vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT);
+ dprintf1("vmsplice() ret: %d\n", vmsplice_ret);
+ pkey_assert(vmsplice_ret == -1);
+
+ close(pipe_fds[0]);
+ close(pipe_fds[1]);
+}
+
+void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
+{
+ int ignored = 0xdada;
+ int futex_ret;
+ int some_int = __LINE__;
+
+ dprintf1("disabling write to PKEY[%02d], "
+ "doing futex gunk in buffer\n", pkey);
+ *ptr = some_int;
+ pkey_write_deny(pkey);
+ futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL,
+ &ignored, ignored);
+ if (DEBUG_LEVEL > 0)
+ perror("futex");
+ dprintf1("futex() ret: %d\n", futex_ret);
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
+{
+ int err;
+ int i;
+
+ /* Note: 0 is the default pkey, so don't mess with it */
+ for (i = 1; i < NR_PKEYS; i++) {
+ if (pkey == i)
+ continue;
+
+ dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i);
+ err = sys_pkey_free(i);
+ pkey_assert(err);
+
+ err = sys_pkey_free(i);
+ pkey_assert(err);
+
+ err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i);
+ pkey_assert(err);
+ }
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
+{
+ int err;
+ int bad_pkey = NR_PKEYS+99;
+
+ /* pass a known-invalid pkey in: */
+ err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey);
+ pkey_assert(err);
+}
+
+void become_child(void)
+{
+ pid_t forkret;
+
+ forkret = fork();
+ pkey_assert(forkret >= 0);
+ dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
+
+ if (!forkret) {
+ /* in the child */
+ return;
+ }
+ exit(0);
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
+{
+ int err;
+ int allocated_pkeys[NR_PKEYS] = {0};
+ int nr_allocated_pkeys = 0;
+ int i;
+
+ for (i = 0; i < NR_PKEYS*3; i++) {
+ int new_pkey;
+ dprintf1("%s() alloc loop: %d\n", __func__, i);
+ new_pkey = alloc_pkey();
+ dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, err, __rdpkru(), shadow_pkru);
+ rdpkru(); /* for shadow checking */
+ dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
+ if ((new_pkey == -1) && (errno == ENOSPC)) {
+ dprintf2("%s() failed to allocate pkey after %d tries\n",
+ __func__, nr_allocated_pkeys);
+ } else {
+ /*
+ * Ensure the number of successes never
+ * exceeds the number of keys supported
+ * in the hardware.
+ */
+ pkey_assert(nr_allocated_pkeys < NR_PKEYS);
+ allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
+ }
+
+ /*
+ * Make sure that allocation state is properly
+ * preserved across fork().
+ */
+ if (i == NR_PKEYS*2)
+ become_child();
+ }
+
+ dprintf3("%s()::%d\n", __func__, __LINE__);
+
+ /*
+ * There are 16 pkeys supported in hardware. Three are
+ * allocated by the time we get here:
+ * 1. The default key (0)
+ * 2. One possibly consumed by an execute-only mapping.
+ * 3. One allocated by the test code and passed in via
+ * 'pkey' to this function.
+ * Ensure that we can allocate at least another 13 (16-3).
+ */
+ pkey_assert(i >= NR_PKEYS-3);
+
+ for (i = 0; i < nr_allocated_pkeys; i++) {
+ err = sys_pkey_free(allocated_pkeys[i]);
+ pkey_assert(!err);
+ rdpkru(); /* for shadow checking */
+ }
+}
+
+/*
+ * pkey 0 is special. It is allocated by default, so you do not
+ * have to call pkey_alloc() to use it first. Make sure that it
+ * is usable.
+ */
+void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
+{
+ long size;
+ int prot;
+
+ assert(pkey_last_malloc_record);
+ size = pkey_last_malloc_record->size;
+ /*
+ * This is a bit of a hack. But mprotect() requires
+ * huge-page-aligned sizes when operating on hugetlbfs.
+ * So, make sure that we use something that's a multiple
+ * of a huge page when we can.
+ */
+ if (size >= HPAGE_SIZE)
+ size = HPAGE_SIZE;
+ prot = pkey_last_malloc_record->prot;
+
+ /* Use pkey 0 */
+ mprotect_pkey(ptr, size, prot, 0);
+
+ /* Make sure that we can set it back to the original pkey. */
+ mprotect_pkey(ptr, size, prot, pkey);
+}
+
+void test_ptrace_of_child(int *ptr, u16 pkey)
+{
+ __attribute__((__unused__)) int peek_result;
+ pid_t child_pid;
+ void *ignored = 0;
+ long ret;
+ int status;
+ /*
+ * This is the "control" for our little expermient. Make sure
+ * we can always access it when ptracing.
+ */
+ int *plain_ptr_unaligned = malloc(HPAGE_SIZE);
+ int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE);
+
+ /*
+ * Fork a child which is an exact copy of this process, of course.
+ * That means we can do all of our tests via ptrace() and then plain
+ * memory access and ensure they work differently.
+ */
+ child_pid = fork_lazy_child();
+ dprintf1("[%d] child pid: %d\n", getpid(), child_pid);
+
+ ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored);
+ if (ret)
+ perror("attach");
+ dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__);
+ pkey_assert(ret != -1);
+ ret = waitpid(child_pid, &status, WUNTRACED);
+ if ((ret != child_pid) || !(WIFSTOPPED(status))) {
+ fprintf(stderr, "weird waitpid result %ld stat %x\n",
+ ret, status);
+ pkey_assert(0);
+ }
+ dprintf2("waitpid ret: %ld\n", ret);
+ dprintf2("waitpid status: %d\n", status);
+
+ pkey_access_deny(pkey);
+ pkey_write_deny(pkey);
+
+ /* Write access, untested for now:
+ ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data);
+ pkey_assert(ret != -1);
+ dprintf1("poke at %p: %ld\n", peek_at, ret);
+ */
+
+ /*
+ * Try to access the pkey-protected "ptr" via ptrace:
+ */
+ ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored);
+ /* expect it to work, without an error: */
+ pkey_assert(ret != -1);
+ /* Now access from the current task, and expect an exception: */
+ peek_result = read_ptr(ptr);
+ expected_pk_fault(pkey);
+
+ /*
+ * Try to access the NON-pkey-protected "plain_ptr" via ptrace:
+ */
+ ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored);
+ /* expect it to work, without an error: */
+ pkey_assert(ret != -1);
+ /* Now access from the current task, and expect NO exception: */
+ peek_result = read_ptr(plain_ptr);
+ do_not_expect_pk_fault("read plain pointer after ptrace");
+
+ ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
+ pkey_assert(ret != -1);
+
+ ret = kill(child_pid, SIGKILL);
+ pkey_assert(ret != -1);
+
+ wait(&status);
+
+ free(plain_ptr_unaligned);
+}
+
+void *get_pointer_to_instructions(void)
+{
+ void *p1;
+
+ p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
+ dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
+ /* lots_o_noops_around_write should be page-aligned already */
+ assert(p1 == &lots_o_noops_around_write);
+
+ /* Point 'p1' at the *second* page of the function: */
+ p1 += PAGE_SIZE;
+
+ /*
+ * Try to ensure we fault this in on next touch to ensure
+ * we get an instruction fault as opposed to a data one
+ */
+ madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+
+ return p1;
+}
+
+void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
+{
+ void *p1;
+ int scratch;
+ int ptr_contents;
+ int ret;
+
+ p1 = get_pointer_to_instructions();
+ lots_o_noops_around_write(&scratch);
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+
+ ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey);
+ pkey_assert(!ret);
+ pkey_access_deny(pkey);
+
+ dprintf2("pkru: %x\n", rdpkru());
+
+ /*
+ * Make sure this is an *instruction* fault
+ */
+ madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+ lots_o_noops_around_write(&scratch);
+ do_not_expect_pk_fault("executing on PROT_EXEC memory");
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+ expected_pk_fault(pkey);
+}
+
+void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
+{
+ void *p1;
+ int scratch;
+ int ptr_contents;
+ int ret;
+
+ dprintf1("%s() start\n", __func__);
+
+ p1 = get_pointer_to_instructions();
+ lots_o_noops_around_write(&scratch);
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+
+ /* Use a *normal* mprotect(), not mprotect_pkey(): */
+ ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
+ pkey_assert(!ret);
+
+ dprintf2("pkru: %x\n", rdpkru());
+
+ /* Make sure this is an *instruction* fault */
+ madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+ lots_o_noops_around_write(&scratch);
+ do_not_expect_pk_fault("executing on PROT_EXEC memory");
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+ expected_pk_fault(UNKNOWN_PKEY);
+
+ /*
+ * Put the memory back to non-PROT_EXEC. Should clear the
+ * exec-only pkey off the VMA and allow it to be readable
+ * again. Go to PROT_NONE first to check for a kernel bug
+ * that did not clear the pkey when doing PROT_NONE.
+ */
+ ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
+ pkey_assert(!ret);
+
+ ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
+ pkey_assert(!ret);
+ ptr_contents = read_ptr(p1);
+ do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
+}
+
+void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
+{
+ int size = PAGE_SIZE;
+ int sret;
+
+ if (cpu_has_pku()) {
+ dprintf1("SKIP: %s: no CPU support\n", __func__);
+ return;
+ }
+
+ sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey);
+ pkey_assert(sret < 0);
+}
+
+void (*pkey_tests[])(int *ptr, u16 pkey) = {
+ test_read_of_write_disabled_region,
+ test_read_of_access_disabled_region,
+ test_write_of_write_disabled_region,
+ test_write_of_access_disabled_region,
+ test_kernel_write_of_access_disabled_region,
+ test_kernel_write_of_write_disabled_region,
+ test_kernel_gup_of_access_disabled_region,
+ test_kernel_gup_write_to_write_disabled_region,
+ test_executing_on_unreadable_memory,
+ test_implicit_mprotect_exec_only_memory,
+ test_mprotect_with_pkey_0,
+ test_ptrace_of_child,
+ test_pkey_syscalls_on_non_allocated_pkey,
+ test_pkey_syscalls_bad_args,
+ test_pkey_alloc_exhaust,
+};
+
+void run_tests_once(void)
+{
+ int *ptr;
+ int prot = PROT_READ|PROT_WRITE;
+
+ for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
+ int pkey;
+ int orig_pkru_faults = pkru_faults;
+
+ dprintf1("======================\n");
+ dprintf1("test %d preparing...\n", test_nr);
+
+ tracing_on();
+ pkey = alloc_random_pkey();
+ dprintf1("test %d starting with pkey: %d\n", test_nr, pkey);
+ ptr = malloc_pkey(PAGE_SIZE, prot, pkey);
+ dprintf1("test %d starting...\n", test_nr);
+ pkey_tests[test_nr](ptr, pkey);
+ dprintf1("freeing test memory: %p\n", ptr);
+ free_pkey_malloc(ptr);
+ sys_pkey_free(pkey);
+
+ dprintf1("pkru_faults: %d\n", pkru_faults);
+ dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
+
+ tracing_off();
+ close_test_fds();
+
+ printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr);
+ dprintf1("======================\n\n");
+ }
+ iteration_nr++;
+}
+
+void pkey_setup_shadow(void)
+{
+ shadow_pkru = __rdpkru();
+}
+
+int main(void)
+{
+ int nr_iterations = 22;
+
+ srand((unsigned int)time(NULL));
+
+ setup_handlers();
+
+ printf("has pku: %d\n", cpu_has_pku());
+
+ if (!cpu_has_pku()) {
+ int size = PAGE_SIZE;
+ int *ptr;
+
+ printf("running PKEY tests for unsupported CPU/OS\n");
+
+ ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ assert(ptr != (void *)-1);
+ test_mprotect_pkey_on_unsupported_cpu(ptr, 1);
+ exit(0);
+ }
+
+ pkey_setup_shadow();
+ printf("startup pkru: %x\n", rdpkru());
+ setup_hugetlbfs();
+
+ while (nr_iterations-- > 0)
+ run_tests_once();
+
+ printf("done (all tests OK)\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
new file mode 100644
index 000000000..12aaa0631
--- /dev/null
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/user.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <err.h>
+#include <string.h>
+#include <asm/ptrace-abi.h>
+#include <sys/auxv.h>
+
+/* Bitness-agnostic defines for user_regs_struct fields. */
+#ifdef __x86_64__
+# define user_syscall_nr orig_rax
+# define user_arg0 rdi
+# define user_arg1 rsi
+# define user_arg2 rdx
+# define user_arg3 r10
+# define user_arg4 r8
+# define user_arg5 r9
+# define user_ip rip
+# define user_ax rax
+#else
+# define user_syscall_nr orig_eax
+# define user_arg0 ebx
+# define user_arg1 ecx
+# define user_arg2 edx
+# define user_arg3 esi
+# define user_arg4 edi
+# define user_arg5 ebp
+# define user_ip eip
+# define user_ax eax
+#endif
+
+static int nerrs = 0;
+
+struct syscall_args32 {
+ uint32_t nr, arg0, arg1, arg2, arg3, arg4, arg5;
+};
+
+#ifdef __i386__
+extern void sys32_helper(struct syscall_args32 *, void *);
+extern void int80_and_ret(void);
+#endif
+
+/*
+ * Helper to invoke int80 with controlled regs and capture the final regs.
+ */
+static void do_full_int80(struct syscall_args32 *args)
+{
+#ifdef __x86_64__
+ register unsigned long bp asm("bp") = args->arg5;
+ asm volatile ("int $0x80"
+ : "+a" (args->nr),
+ "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2),
+ "+S" (args->arg3), "+D" (args->arg4), "+r" (bp)
+ : : "r8", "r9", "r10", "r11");
+ args->arg5 = bp;
+#else
+ sys32_helper(args, int80_and_ret);
+#endif
+}
+
+#ifdef __i386__
+static void (*vsyscall32)(void);
+
+/*
+ * Nasty helper to invoke AT_SYSINFO (i.e. __kernel_vsyscall) with
+ * controlled regs and capture the final regs. This is so nasty that it
+ * crashes my copy of gdb :)
+ */
+static void do_full_vsyscall32(struct syscall_args32 *args)
+{
+ sys32_helper(args, vsyscall32);
+}
+#endif
+
+static siginfo_t wait_trap(pid_t chld)
+{
+ siginfo_t si;
+ if (waitid(P_PID, chld, &si, WEXITED|WSTOPPED) != 0)
+ err(1, "waitid");
+ if (si.si_pid != chld)
+ errx(1, "got unexpected pid in event\n");
+ if (si.si_code != CLD_TRAPPED)
+ errx(1, "got unexpected event type %d\n", si.si_code);
+ return si;
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void setsigign(int sig, int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = (void *)SIG_IGN;
+ sa.sa_flags = flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+#ifdef __x86_64__
+# define REG_BP REG_RBP
+#else
+# define REG_BP REG_EBP
+#endif
+
+static void empty_handler(int sig, siginfo_t *si, void *ctx_void)
+{
+}
+
+static void test_sys32_regs(void (*do_syscall)(struct syscall_args32 *))
+{
+ struct syscall_args32 args = {
+ .nr = 224, /* gettid */
+ .arg0 = 10, .arg1 = 11, .arg2 = 12,
+ .arg3 = 13, .arg4 = 14, .arg5 = 15,
+ };
+
+ do_syscall(&args);
+
+ if (args.nr != getpid() ||
+ args.arg0 != 10 || args.arg1 != 11 || args.arg2 != 12 ||
+ args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) {
+ printf("[FAIL]\tgetpid() failed to preserve regs\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tgetpid() preserves regs\n");
+ }
+
+ sethandler(SIGUSR1, empty_handler, 0);
+
+ args.nr = 37; /* kill */
+ args.arg0 = getpid();
+ args.arg1 = SIGUSR1;
+ do_syscall(&args);
+ if (args.nr != 0 ||
+ args.arg0 != getpid() || args.arg1 != SIGUSR1 || args.arg2 != 12 ||
+ args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) {
+ printf("[FAIL]\tkill(getpid(), SIGUSR1) failed to preserve regs\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tkill(getpid(), SIGUSR1) preserves regs\n");
+ }
+ clearhandler(SIGUSR1);
+}
+
+static void test_ptrace_syscall_restart(void)
+{
+ printf("[RUN]\tptrace-induced syscall restart\n");
+ pid_t chld = fork();
+ if (chld < 0)
+ err(1, "fork");
+
+ if (chld == 0) {
+ if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+ printf("\tChild will make one syscall\n");
+ syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+ syscall(SYS_gettid, 10, 11, 12, 13, 14, 15);
+ _exit(0);
+ }
+
+ int status;
+
+ /* Wait for SIGSTOP. */
+ if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+ err(1, "waitpid");
+
+ struct user_regs_struct regs;
+
+ printf("[RUN]\tSYSEMU\n");
+ if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSEMU");
+ wait_trap(chld);
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ if (regs.user_syscall_nr != SYS_gettid ||
+ regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+ regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+ regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+ printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tInitial nr and args are correct\n");
+ }
+
+ printf("[RUN]\tRestart the syscall (ip = 0x%lx)\n",
+ (unsigned long)regs.user_ip);
+
+ /*
+ * This does exactly what it appears to do if syscall is int80 or
+ * SYSCALL64. For SYSCALL32 or SYSENTER, though, this is highly
+ * magical. It needs to work so that ptrace and syscall restart
+ * work as expected.
+ */
+ regs.user_ax = regs.user_syscall_nr;
+ regs.user_ip -= 2;
+ if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_SETREGS");
+
+ if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSEMU");
+ wait_trap(chld);
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ if (regs.user_syscall_nr != SYS_gettid ||
+ regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+ regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+ regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+ printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tRestarted nr and args are correct\n");
+ }
+
+ printf("[RUN]\tChange nr and args and restart the syscall (ip = 0x%lx)\n",
+ (unsigned long)regs.user_ip);
+
+ regs.user_ax = SYS_getpid;
+ regs.user_arg0 = 20;
+ regs.user_arg1 = 21;
+ regs.user_arg2 = 22;
+ regs.user_arg3 = 23;
+ regs.user_arg4 = 24;
+ regs.user_arg5 = 25;
+ regs.user_ip -= 2;
+
+ if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_SETREGS");
+
+ if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSEMU");
+ wait_trap(chld);
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ if (regs.user_syscall_nr != SYS_getpid ||
+ regs.user_arg0 != 20 || regs.user_arg1 != 21 || regs.user_arg2 != 22 ||
+ regs.user_arg3 != 23 || regs.user_arg4 != 24 || regs.user_arg5 != 25) {
+ printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tReplacement nr and args are correct\n");
+ }
+
+ if (ptrace(PTRACE_CONT, chld, 0, 0) != 0)
+ err(1, "PTRACE_CONT");
+ if (waitpid(chld, &status, 0) != chld)
+ err(1, "waitpid");
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild exited cleanly\n");
+ }
+}
+
+static void test_restart_under_ptrace(void)
+{
+ printf("[RUN]\tkernel syscall restart under ptrace\n");
+ pid_t chld = fork();
+ if (chld < 0)
+ err(1, "fork");
+
+ if (chld == 0) {
+ if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+ printf("\tChild will take a nap until signaled\n");
+ setsigign(SIGUSR1, SA_RESTART);
+ syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+ syscall(SYS_pause, 0, 0, 0, 0, 0, 0);
+ _exit(0);
+ }
+
+ int status;
+
+ /* Wait for SIGSTOP. */
+ if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+ err(1, "waitpid");
+
+ struct user_regs_struct regs;
+
+ printf("[RUN]\tSYSCALL\n");
+ if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSCALL");
+ wait_trap(chld);
+
+ /* We should be stopped at pause(2) entry. */
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ if (regs.user_syscall_nr != SYS_pause ||
+ regs.user_arg0 != 0 || regs.user_arg1 != 0 ||
+ regs.user_arg2 != 0 || regs.user_arg3 != 0 ||
+ regs.user_arg4 != 0 || regs.user_arg5 != 0) {
+ printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tInitial nr and args are correct\n");
+ }
+
+ /* Interrupt it. */
+ kill(chld, SIGUSR1);
+
+ /* Advance. We should be stopped at exit. */
+ printf("[RUN]\tSYSCALL\n");
+ if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSCALL");
+ wait_trap(chld);
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ if (regs.user_syscall_nr != SYS_pause ||
+ regs.user_arg0 != 0 || regs.user_arg1 != 0 ||
+ regs.user_arg2 != 0 || regs.user_arg3 != 0 ||
+ regs.user_arg4 != 0 || regs.user_arg5 != 0) {
+ printf("[FAIL]\tArgs after SIGUSR1 are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tArgs after SIGUSR1 are correct (ax = %ld)\n",
+ (long)regs.user_ax);
+ }
+
+ /* Poke the regs back in. This must not break anything. */
+ if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_SETREGS");
+
+ /* Catch the (ignored) SIGUSR1. */
+ if (ptrace(PTRACE_CONT, chld, 0, 0) != 0)
+ err(1, "PTRACE_CONT");
+ if (waitpid(chld, &status, 0) != chld)
+ err(1, "waitpid");
+ if (!WIFSTOPPED(status)) {
+ printf("[FAIL]\tChild was stopped for SIGUSR1 (status = 0x%x)\n", status);
+ nerrs++;
+ } else {
+ printf("[OK]\tChild got SIGUSR1\n");
+ }
+
+ /* The next event should be pause(2) again. */
+ printf("[RUN]\tStep again\n");
+ if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSCALL");
+ wait_trap(chld);
+
+ /* We should be stopped at pause(2) entry. */
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ if (regs.user_syscall_nr != SYS_pause ||
+ regs.user_arg0 != 0 || regs.user_arg1 != 0 ||
+ regs.user_arg2 != 0 || regs.user_arg3 != 0 ||
+ regs.user_arg4 != 0 || regs.user_arg5 != 0) {
+ printf("[FAIL]\tpause did not restart (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tpause(2) restarted correctly\n");
+ }
+
+ /* Kill it. */
+ kill(chld, SIGKILL);
+ if (waitpid(chld, &status, 0) != chld)
+ err(1, "waitpid");
+}
+
+int main()
+{
+ printf("[RUN]\tCheck int80 return regs\n");
+ test_sys32_regs(do_full_int80);
+
+#if defined(__i386__) && (!defined(__GLIBC__) || __GLIBC__ > 2 || __GLIBC_MINOR__ >= 16)
+ vsyscall32 = (void *)getauxval(AT_SYSINFO);
+ if (vsyscall32) {
+ printf("[RUN]\tCheck AT_SYSINFO return regs\n");
+ test_sys32_regs(do_full_vsyscall32);
+ } else {
+ printf("[SKIP]\tAT_SYSINFO is not available\n");
+ }
+#endif
+
+ test_ptrace_syscall_restart();
+
+ test_restart_under_ptrace();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/raw_syscall_helper_32.S b/tools/testing/selftests/x86/raw_syscall_helper_32.S
new file mode 100644
index 000000000..94410fa2b
--- /dev/null
+++ b/tools/testing/selftests/x86/raw_syscall_helper_32.S
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+.global sys32_helper
+sys32_helper:
+ /* Args: syscall_args_32*, function pointer */
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 5*4(%esp), %eax /* pointer to args struct */
+
+ movl 1*4(%eax), %ebx
+ movl 2*4(%eax), %ecx
+ movl 3*4(%eax), %edx
+ movl 4*4(%eax), %esi
+ movl 5*4(%eax), %edi
+ movl 6*4(%eax), %ebp
+ movl 0*4(%eax), %eax
+
+ call *(6*4)(%esp) /* Do the syscall */
+
+ /* Now we need to recover without losing any reg values */
+ pushl %eax
+ movl 6*4(%esp), %eax
+ popl 0*4(%eax)
+ movl %ebx, 1*4(%eax)
+ movl %ecx, 2*4(%eax)
+ movl %edx, 3*4(%eax)
+ movl %esi, 4*4(%eax)
+ movl %edi, 5*4(%eax)
+ movl %ebp, 6*4(%eax)
+
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+
+ .type sys32_helper, @function
+ .size sys32_helper, .-sys32_helper
+
+.global int80_and_ret
+int80_and_ret:
+ int $0x80
+ ret
+
+ .type int80_and_ret, @function
+ .size int80_and_ret, .-int80_and_ret
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
new file mode 100644
index 000000000..4d9dc3f2f
--- /dev/null
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -0,0 +1,871 @@
+/*
+ * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * This is a series of tests that exercises the sigreturn(2) syscall and
+ * the IRET / SYSRET paths in the kernel.
+ *
+ * For now, this focuses on the effects of unusual CS and SS values,
+ * and it has a bunch of tests to make sure that ESP/RSP is restored
+ * properly.
+ *
+ * The basic idea behind these tests is to raise(SIGUSR1) to create a
+ * sigcontext frame, plug in the values to be tested, and then return,
+ * which implicitly invokes sigreturn(2) and programs the user context
+ * as desired.
+ *
+ * For tests for which we expect sigreturn and the subsequent return to
+ * user mode to succeed, we return to a short trampoline that generates
+ * SIGTRAP so that the meat of the tests can be ordinary C code in a
+ * SIGTRAP handler.
+ *
+ * The inner workings of each test is documented below.
+ *
+ * Do not run on outdated, unpatched kernels at risk of nasty crashes.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <asm/ldt.h>
+#include <err.h>
+#include <setjmp.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+
+/* Pull in AR_xyz defines. */
+typedef unsigned int u32;
+typedef unsigned short u16;
+#include "../../../../arch/x86/include/asm/desc_defs.h"
+
+/*
+ * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc
+ * headers.
+ */
+#ifdef __x86_64__
+/*
+ * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
+ * kernels that save SS in the sigcontext. All kernels that set
+ * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
+ * regardless of SS (i.e. they implement espfix).
+ *
+ * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
+ * when delivering a signal that came from 64-bit code.
+ *
+ * Sigreturn restores SS as follows:
+ *
+ * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
+ * saved CS is not 64-bit)
+ * new SS = saved SS (will fail IRET and signal if invalid)
+ * else
+ * new SS = a flat 32-bit data segment
+ */
+#define UC_SIGCONTEXT_SS 0x2
+#define UC_STRICT_RESTORE_SS 0x4
+#endif
+
+/*
+ * In principle, this test can run on Linux emulation layers (e.g.
+ * Illumos "LX branded zones"). Solaris-based kernels reserve LDT
+ * entries 0-5 for their own internal purposes, so start our LDT
+ * allocations above that reservation. (The tests don't pass on LX
+ * branded zones, but at least this lets them run.)
+ */
+#define LDT_OFFSET 6
+
+/* An aligned stack accessible through some of our segments. */
+static unsigned char stack16[65536] __attribute__((aligned(4096)));
+
+/*
+ * An aligned int3 instruction used as a trampoline. Some of the tests
+ * want to fish out their ss values, so this trampoline copies ss to eax
+ * before the int3.
+ */
+asm (".pushsection .text\n\t"
+ ".type int3, @function\n\t"
+ ".align 4096\n\t"
+ "int3:\n\t"
+ "mov %ss,%ecx\n\t"
+ "int3\n\t"
+ ".size int3, . - int3\n\t"
+ ".align 4096, 0xcc\n\t"
+ ".popsection");
+extern char int3[4096];
+
+/*
+ * At startup, we prepapre:
+ *
+ * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
+ * descriptor or out of bounds).
+ * - code16_sel: A 16-bit LDT code segment pointing to int3.
+ * - data16_sel: A 16-bit LDT data segment pointing to stack16.
+ * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
+ * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
+ * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
+ * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
+ * stack16.
+ *
+ * For no particularly good reason, xyz_sel is a selector value with the
+ * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
+ * descriptor table. These variables will be zero if their respective
+ * segments could not be allocated.
+ */
+static unsigned short ldt_nonexistent_sel;
+static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
+
+static unsigned short gdt_data16_idx, gdt_npdata32_idx;
+
+static unsigned short GDT3(int idx)
+{
+ return (idx << 3) | 3;
+}
+
+static unsigned short LDT3(int idx)
+{
+ return (idx << 3) | 7;
+}
+
+/* Our sigaltstack scratch space. */
+static char altstack_data[SIGSTKSZ];
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void add_ldt(const struct user_desc *desc, unsigned short *var,
+ const char *name)
+{
+ if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
+ *var = LDT3(desc->entry_number);
+ } else {
+ printf("[NOTE]\tFailed to create %s segment\n", name);
+ *var = 0;
+ }
+}
+
+static void setup_ldt(void)
+{
+ if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
+ errx(1, "stack16 is too high\n");
+ if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
+ errx(1, "int3 is too high\n");
+
+ ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
+
+ const struct user_desc code16_desc = {
+ .entry_number = LDT_OFFSET + 0,
+ .base_addr = (unsigned long)int3,
+ .limit = 4095,
+ .seg_32bit = 0,
+ .contents = 2, /* Code, not conforming */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ add_ldt(&code16_desc, &code16_sel, "code16");
+
+ const struct user_desc data16_desc = {
+ .entry_number = LDT_OFFSET + 1,
+ .base_addr = (unsigned long)stack16,
+ .limit = 0xffff,
+ .seg_32bit = 0,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ add_ldt(&data16_desc, &data16_sel, "data16");
+
+ const struct user_desc npcode32_desc = {
+ .entry_number = LDT_OFFSET + 3,
+ .base_addr = (unsigned long)int3,
+ .limit = 4095,
+ .seg_32bit = 1,
+ .contents = 2, /* Code, not conforming */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 1,
+ .useable = 0
+ };
+ add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
+
+ const struct user_desc npdata32_desc = {
+ .entry_number = LDT_OFFSET + 4,
+ .base_addr = (unsigned long)stack16,
+ .limit = 0xffff,
+ .seg_32bit = 1,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 1,
+ .useable = 0
+ };
+ add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
+
+ struct user_desc gdt_data16_desc = {
+ .entry_number = -1,
+ .base_addr = (unsigned long)stack16,
+ .limit = 0xffff,
+ .seg_32bit = 0,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+
+ if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
+ /*
+ * This probably indicates vulnerability to CVE-2014-8133.
+ * Merely getting here isn't definitive, though, and we'll
+ * diagnose the problem for real later on.
+ */
+ printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
+ gdt_data16_desc.entry_number);
+ gdt_data16_idx = gdt_data16_desc.entry_number;
+ } else {
+ printf("[OK]\tset_thread_area refused 16-bit data\n");
+ }
+
+ struct user_desc gdt_npdata32_desc = {
+ .entry_number = -1,
+ .base_addr = (unsigned long)stack16,
+ .limit = 0xffff,
+ .seg_32bit = 1,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 1,
+ .useable = 0
+ };
+
+ if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
+ /*
+ * As a hardening measure, newer kernels don't allow this.
+ */
+ printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
+ gdt_npdata32_desc.entry_number);
+ gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
+ } else {
+ printf("[OK]\tset_thread_area refused 16-bit data\n");
+ }
+}
+
+/* State used by our signal handlers. */
+static gregset_t initial_regs, requested_regs, resulting_regs;
+
+/* Instructions for the SIGUSR1 handler. */
+static volatile unsigned short sig_cs, sig_ss;
+static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
+#ifdef __x86_64__
+static volatile sig_atomic_t sig_corrupt_final_ss;
+#endif
+
+/* Abstractions for some 32-bit vs 64-bit differences. */
+#ifdef __x86_64__
+# define REG_IP REG_RIP
+# define REG_SP REG_RSP
+# define REG_CX REG_RCX
+
+struct selectors {
+ unsigned short cs, gs, fs, ss;
+};
+
+static unsigned short *ssptr(ucontext_t *ctx)
+{
+ struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
+ return &sels->ss;
+}
+
+static unsigned short *csptr(ucontext_t *ctx)
+{
+ struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
+ return &sels->cs;
+}
+#else
+# define REG_IP REG_EIP
+# define REG_SP REG_ESP
+# define REG_CX REG_ECX
+
+static greg_t *ssptr(ucontext_t *ctx)
+{
+ return &ctx->uc_mcontext.gregs[REG_SS];
+}
+
+static greg_t *csptr(ucontext_t *ctx)
+{
+ return &ctx->uc_mcontext.gregs[REG_CS];
+}
+#endif
+
+/*
+ * Checks a given selector for its code bitness or returns -1 if it's not
+ * a usable code segment selector.
+ */
+int cs_bitness(unsigned short cs)
+{
+ uint32_t valid = 0, ar;
+ asm ("lar %[cs], %[ar]\n\t"
+ "jnz 1f\n\t"
+ "mov $1, %[valid]\n\t"
+ "1:"
+ : [ar] "=r" (ar), [valid] "+rm" (valid)
+ : [cs] "r" (cs));
+
+ if (!valid)
+ return -1;
+
+ bool db = (ar & (1 << 22));
+ bool l = (ar & (1 << 21));
+
+ if (!(ar & (1<<11)))
+ return -1; /* Not code. */
+
+ if (l && !db)
+ return 64;
+ else if (!l && db)
+ return 32;
+ else if (!l && !db)
+ return 16;
+ else
+ return -1; /* Unknown bitness. */
+}
+
+/*
+ * Checks a given selector for its code bitness or returns -1 if it's not
+ * a usable code segment selector.
+ */
+bool is_valid_ss(unsigned short cs)
+{
+ uint32_t valid = 0, ar;
+ asm ("lar %[cs], %[ar]\n\t"
+ "jnz 1f\n\t"
+ "mov $1, %[valid]\n\t"
+ "1:"
+ : [ar] "=r" (ar), [valid] "+rm" (valid)
+ : [cs] "r" (cs));
+
+ if (!valid)
+ return false;
+
+ if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA &&
+ (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN)
+ return false;
+
+ return (ar & AR_P);
+}
+
+/* Number of errors in the current test case. */
+static volatile sig_atomic_t nerrs;
+
+static void validate_signal_ss(int sig, ucontext_t *ctx)
+{
+#ifdef __x86_64__
+ bool was_64bit = (cs_bitness(*csptr(ctx)) == 64);
+
+ if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) {
+ printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n");
+ nerrs++;
+
+ /*
+ * This happens on Linux 4.1. The rest will fail, too, so
+ * return now to reduce the noise.
+ */
+ return;
+ }
+
+ /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */
+ if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) {
+ printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n",
+ sig);
+ nerrs++;
+ }
+
+ if (is_valid_ss(*ssptr(ctx))) {
+ /*
+ * DOSEMU was written before 64-bit sigcontext had SS, and
+ * it tries to figure out the signal source SS by looking at
+ * the physical register. Make sure that keeps working.
+ */
+ unsigned short hw_ss;
+ asm ("mov %%ss, %0" : "=rm" (hw_ss));
+ if (hw_ss != *ssptr(ctx)) {
+ printf("[FAIL]\tHW SS didn't match saved SS\n");
+ nerrs++;
+ }
+ }
+#endif
+}
+
+/*
+ * SIGUSR1 handler. Sets CS and SS as requested and points IP to the
+ * int3 trampoline. Sets SP to a large known value so that we can see
+ * whether the value round-trips back to user mode correctly.
+ */
+static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ validate_signal_ss(sig, ctx);
+
+ memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+
+ *csptr(ctx) = sig_cs;
+ *ssptr(ctx) = sig_ss;
+
+ ctx->uc_mcontext.gregs[REG_IP] =
+ sig_cs == code16_sel ? 0 : (unsigned long)&int3;
+ ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
+ ctx->uc_mcontext.gregs[REG_CX] = 0;
+
+ memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+ requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */
+
+ return;
+}
+
+/*
+ * Called after a successful sigreturn (via int3) or from a failed
+ * sigreturn (directly by kernel). Restores our state so that the
+ * original raise(SIGUSR1) returns.
+ */
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ validate_signal_ss(sig, ctx);
+
+ sig_err = ctx->uc_mcontext.gregs[REG_ERR];
+ sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
+
+ unsigned short ss;
+ asm ("mov %%ss,%0" : "=r" (ss));
+
+ greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
+ if (asm_ss != sig_ss && sig == SIGTRAP) {
+ /* Sanity check failure. */
+ printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
+ ss, *ssptr(ctx), (unsigned long long)asm_ss);
+ nerrs++;
+ }
+
+ memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+ memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
+
+#ifdef __x86_64__
+ if (sig_corrupt_final_ss) {
+ if (ctx->uc_flags & UC_STRICT_RESTORE_SS) {
+ printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n");
+ nerrs++;
+ } else {
+ /*
+ * DOSEMU transitions from 32-bit to 64-bit mode by
+ * adjusting sigcontext, and it requires that this work
+ * even if the saved SS is bogus.
+ */
+ printf("\tCorrupting SS on return to 64-bit mode\n");
+ *ssptr(ctx) = 0;
+ }
+ }
+#endif
+
+ sig_trapped = sig;
+}
+
+#ifdef __x86_64__
+/* Tests recovery if !UC_STRICT_RESTORE_SS */
+static void sigusr2(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) {
+ printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n");
+ nerrs++;
+ return; /* We can't do the rest. */
+ }
+
+ ctx->uc_flags &= ~UC_STRICT_RESTORE_SS;
+ *ssptr(ctx) = 0;
+
+ /* Return. The kernel should recover without sending another signal. */
+}
+
+static int test_nonstrict_ss(void)
+{
+ clearhandler(SIGUSR1);
+ clearhandler(SIGTRAP);
+ clearhandler(SIGSEGV);
+ clearhandler(SIGILL);
+ sethandler(SIGUSR2, sigusr2, 0);
+
+ nerrs = 0;
+
+ printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n");
+ raise(SIGUSR2);
+ if (!nerrs)
+ printf("[OK]\tIt worked\n");
+
+ return nerrs;
+}
+#endif
+
+/* Finds a usable code segment of the requested bitness. */
+int find_cs(int bitness)
+{
+ unsigned short my_cs;
+
+ asm ("mov %%cs,%0" : "=r" (my_cs));
+
+ if (cs_bitness(my_cs) == bitness)
+ return my_cs;
+ if (cs_bitness(my_cs + (2 << 3)) == bitness)
+ return my_cs + (2 << 3);
+ if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
+ return my_cs - (2 << 3);
+ if (cs_bitness(code16_sel) == bitness)
+ return code16_sel;
+
+ printf("[WARN]\tCould not find %d-bit CS\n", bitness);
+ return -1;
+}
+
+static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
+{
+ int cs = find_cs(cs_bits);
+ if (cs == -1) {
+ printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
+ cs_bits, use_16bit_ss ? 16 : 32);
+ return 0;
+ }
+
+ if (force_ss != -1) {
+ sig_ss = force_ss;
+ } else {
+ if (use_16bit_ss) {
+ if (!data16_sel) {
+ printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
+ cs_bits);
+ return 0;
+ }
+ sig_ss = data16_sel;
+ } else {
+ asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
+ }
+ }
+
+ sig_cs = cs;
+
+ printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
+ cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
+ (sig_ss & 4) ? "" : ", GDT");
+
+ raise(SIGUSR1);
+
+ nerrs = 0;
+
+ /*
+ * Check that each register had an acceptable value when the
+ * int3 trampoline was invoked.
+ */
+ for (int i = 0; i < NGREG; i++) {
+ greg_t req = requested_regs[i], res = resulting_regs[i];
+
+ if (i == REG_TRAPNO || i == REG_IP)
+ continue; /* don't care */
+
+ if (i == REG_SP) {
+ /*
+ * If we were using a 16-bit stack segment, then
+ * the kernel is a bit stuck: IRET only restores
+ * the low 16 bits of ESP/RSP if SS is 16-bit.
+ * The kernel uses a hack to restore bits 31:16,
+ * but that hack doesn't help with bits 63:32.
+ * On Intel CPUs, bits 63:32 end up zeroed, and, on
+ * AMD CPUs, they leak the high bits of the kernel
+ * espfix64 stack pointer. There's very little that
+ * the kernel can do about it.
+ *
+ * Similarly, if we are returning to a 32-bit context,
+ * the CPU will often lose the high 32 bits of RSP.
+ */
+
+ if (res == req)
+ continue;
+
+ if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
+ printf("[NOTE]\tSP: %llx -> %llx\n",
+ (unsigned long long)req,
+ (unsigned long long)res);
+ continue;
+ }
+
+ printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
+ (unsigned long long)requested_regs[i],
+ (unsigned long long)resulting_regs[i]);
+ nerrs++;
+ continue;
+ }
+
+ bool ignore_reg = false;
+#if __i386__
+ if (i == REG_UESP)
+ ignore_reg = true;
+#else
+ if (i == REG_CSGSFS) {
+ struct selectors *req_sels =
+ (void *)&requested_regs[REG_CSGSFS];
+ struct selectors *res_sels =
+ (void *)&resulting_regs[REG_CSGSFS];
+ if (req_sels->cs != res_sels->cs) {
+ printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
+ req_sels->cs, res_sels->cs);
+ nerrs++;
+ }
+
+ if (req_sels->ss != res_sels->ss) {
+ printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
+ req_sels->ss, res_sels->ss);
+ nerrs++;
+ }
+
+ continue;
+ }
+#endif
+
+ /* Sanity check on the kernel */
+ if (i == REG_CX && req != res) {
+ printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
+ (unsigned long long)req,
+ (unsigned long long)res);
+ nerrs++;
+ continue;
+ }
+
+ if (req != res && !ignore_reg) {
+ printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
+ i, (unsigned long long)req,
+ (unsigned long long)res);
+ nerrs++;
+ }
+ }
+
+ if (nerrs == 0)
+ printf("[OK]\tall registers okay\n");
+
+ return nerrs;
+}
+
+static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
+{
+ int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
+ if (cs == -1)
+ return 0;
+
+ sig_cs = cs;
+ sig_ss = ss;
+
+ printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
+ cs_bits, sig_cs, sig_ss);
+
+ sig_trapped = 0;
+ raise(SIGUSR1);
+ if (sig_trapped) {
+ char errdesc[32] = "";
+ if (sig_err) {
+ const char *src = (sig_err & 1) ? " EXT" : "";
+ const char *table;
+ if ((sig_err & 0x6) == 0x0)
+ table = "GDT";
+ else if ((sig_err & 0x6) == 0x4)
+ table = "LDT";
+ else if ((sig_err & 0x6) == 0x2)
+ table = "IDT";
+ else
+ table = "???";
+
+ sprintf(errdesc, "%s%s index %d, ",
+ table, src, sig_err >> 3);
+ }
+
+ char trapname[32];
+ if (sig_trapno == 13)
+ strcpy(trapname, "GP");
+ else if (sig_trapno == 11)
+ strcpy(trapname, "NP");
+ else if (sig_trapno == 12)
+ strcpy(trapname, "SS");
+ else if (sig_trapno == 32)
+ strcpy(trapname, "IRET"); /* X86_TRAP_IRET */
+ else
+ sprintf(trapname, "%d", sig_trapno);
+
+ printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
+ trapname, (unsigned long)sig_err,
+ errdesc, strsignal(sig_trapped));
+ return 0;
+ } else {
+ /*
+ * This also implicitly tests UC_STRICT_RESTORE_SS:
+ * We check that these signals set UC_STRICT_RESTORE_SS and,
+ * if UC_STRICT_RESTORE_SS doesn't cause strict behavior,
+ * then we won't get SIGSEGV.
+ */
+ printf("[FAIL]\tDid not get SIGSEGV\n");
+ return 1;
+ }
+}
+
+int main()
+{
+ int total_nerrs = 0;
+ unsigned short my_cs, my_ss;
+
+ asm volatile ("mov %%cs,%0" : "=r" (my_cs));
+ asm volatile ("mov %%ss,%0" : "=r" (my_ss));
+ setup_ldt();
+
+ stack_t stack = {
+ .ss_sp = altstack_data,
+ .ss_size = SIGSTKSZ,
+ };
+ if (sigaltstack(&stack, NULL) != 0)
+ err(1, "sigaltstack");
+
+ sethandler(SIGUSR1, sigusr1, 0);
+ sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
+
+ /* Easy cases: return to a 32-bit SS in each possible CS bitness. */
+ total_nerrs += test_valid_sigreturn(64, false, -1);
+ total_nerrs += test_valid_sigreturn(32, false, -1);
+ total_nerrs += test_valid_sigreturn(16, false, -1);
+
+ /*
+ * Test easy espfix cases: return to a 16-bit LDT SS in each possible
+ * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant.
+ *
+ * This catches the original missing-espfix-on-64-bit-kernels issue
+ * as well as CVE-2014-8134.
+ */
+ total_nerrs += test_valid_sigreturn(64, true, -1);
+ total_nerrs += test_valid_sigreturn(32, true, -1);
+ total_nerrs += test_valid_sigreturn(16, true, -1);
+
+ if (gdt_data16_idx) {
+ /*
+ * For performance reasons, Linux skips espfix if SS points
+ * to the GDT. If we were able to allocate a 16-bit SS in
+ * the GDT, see if it leaks parts of the kernel stack pointer.
+ *
+ * This tests for CVE-2014-8133.
+ */
+ total_nerrs += test_valid_sigreturn(64, true,
+ GDT3(gdt_data16_idx));
+ total_nerrs += test_valid_sigreturn(32, true,
+ GDT3(gdt_data16_idx));
+ total_nerrs += test_valid_sigreturn(16, true,
+ GDT3(gdt_data16_idx));
+ }
+
+#ifdef __x86_64__
+ /* Nasty ABI case: check SS corruption handling. */
+ sig_corrupt_final_ss = 1;
+ total_nerrs += test_valid_sigreturn(32, false, -1);
+ total_nerrs += test_valid_sigreturn(32, true, -1);
+ sig_corrupt_final_ss = 0;
+#endif
+
+ /*
+ * We're done testing valid sigreturn cases. Now we test states
+ * for which sigreturn itself will succeed but the subsequent
+ * entry to user mode will fail.
+ *
+ * Depending on the failure mode and the kernel bitness, these
+ * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
+ */
+ clearhandler(SIGTRAP);
+ sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
+ sethandler(SIGBUS, sigtrap, SA_ONSTACK);
+ sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */
+
+ /* Easy failures: invalid SS, resulting in #GP(0) */
+ test_bad_iret(64, ldt_nonexistent_sel, -1);
+ test_bad_iret(32, ldt_nonexistent_sel, -1);
+ test_bad_iret(16, ldt_nonexistent_sel, -1);
+
+ /* These fail because SS isn't a data segment, resulting in #GP(SS) */
+ test_bad_iret(64, my_cs, -1);
+ test_bad_iret(32, my_cs, -1);
+ test_bad_iret(16, my_cs, -1);
+
+ /* Try to return to a not-present code segment, triggering #NP(SS). */
+ test_bad_iret(32, my_ss, npcode32_sel);
+
+ /*
+ * Try to return to a not-present but otherwise valid data segment.
+ * This will cause IRET to fail with #SS on the espfix stack. This
+ * exercises CVE-2014-9322.
+ *
+ * Note that, if espfix is enabled, 64-bit Linux will lose track
+ * of the actual cause of failure and report #GP(0) instead.
+ * This would be very difficult for Linux to avoid, because
+ * espfix64 causes IRET failures to be promoted to #DF, so the
+ * original exception frame is never pushed onto the stack.
+ */
+ test_bad_iret(32, npdata32_sel, -1);
+
+ /*
+ * Try to return to a not-present but otherwise valid data
+ * segment without invoking espfix. Newer kernels don't allow
+ * this to happen in the first place. On older kernels, though,
+ * this can trigger CVE-2014-9322.
+ */
+ if (gdt_npdata32_idx)
+ test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
+
+#ifdef __x86_64__
+ total_nerrs += test_nonstrict_ss();
+#endif
+
+ return total_nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
new file mode 100644
index 000000000..ddfdd635d
--- /dev/null
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -0,0 +1,187 @@
+/*
+ * single_step_syscall.c - single-steps various x86 syscalls
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * This is a very simple series of tests that makes system calls with
+ * the TF flag set. This exercises some nasty kernel code in the
+ * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set
+ * immediately issues #DB from CPL 0. This requires special handling in
+ * the kernel.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <asm/ldt.h>
+#include <err.h>
+#include <setjmp.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static volatile sig_atomic_t sig_traps;
+
+#ifdef __x86_64__
+# define REG_IP REG_RIP
+# define WIDTH "q"
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
+#else
+# define REG_IP REG_EIP
+# define WIDTH "l"
+# define INT80_CLOBBERS
+#endif
+
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+ : : "rm" (eflags) : "flags");
+}
+
+#define X86_EFLAGS_TF (1UL << 8)
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (get_eflags() & X86_EFLAGS_TF) {
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+ printf("[WARN]\tSIGTRAP handler had TF set\n");
+ _exit(1);
+ }
+
+ sig_traps++;
+
+ if (sig_traps == 10000 || sig_traps == 10001) {
+ printf("[WARN]\tHit %d SIGTRAPs with si_addr 0x%lx, ip 0x%lx\n",
+ (int)sig_traps,
+ (unsigned long)info->si_addr,
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+ }
+}
+
+static void check_result(void)
+{
+ unsigned long new_eflags = get_eflags();
+ set_eflags(new_eflags & ~X86_EFLAGS_TF);
+
+ if (!sig_traps) {
+ printf("[FAIL]\tNo SIGTRAP\n");
+ exit(1);
+ }
+
+ if (!(new_eflags & X86_EFLAGS_TF)) {
+ printf("[FAIL]\tTF was cleared\n");
+ exit(1);
+ }
+
+ printf("[OK]\tSurvived with TF set and %d traps\n", (int)sig_traps);
+ sig_traps = 0;
+}
+
+int main()
+{
+#ifdef CAN_BUILD_32
+ int tmp;
+#endif
+
+ sethandler(SIGTRAP, sigtrap, 0);
+
+ printf("[RUN]\tSet TF and check nop\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ asm volatile ("nop");
+ check_result();
+
+#ifdef __x86_64__
+ printf("[RUN]\tSet TF and check syscall-less opportunistic sysret\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ extern unsigned char post_nop[];
+ asm volatile ("pushf" WIDTH "\n\t"
+ "pop" WIDTH " %%r11\n\t"
+ "nop\n\t"
+ "post_nop:"
+ : : "c" (post_nop) : "r11");
+ check_result();
+#endif
+#ifdef CAN_BUILD_32
+ printf("[RUN]\tSet TF and check int80\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
+ : INT80_CLOBBERS);
+ check_result();
+#endif
+
+ /*
+ * This test is particularly interesting if fast syscalls use
+ * SYSENTER: it triggers a nasty design flaw in SYSENTER.
+ * Specifically, SYSENTER does not clear TF, so either SYSENTER
+ * or the next instruction traps at CPL0. (Of course, Intel
+ * mostly forgot to document exactly what happens here.) So we
+ * get a CPL0 fault with usergs (on 64-bit kernels) and possibly
+ * no stack. The only sane way the kernel can possibly handle
+ * it is to clear TF on return from the #DB handler, but this
+ * happens way too early to set TF in the saved pt_regs, so the
+ * kernel has to do something clever to avoid losing track of
+ * the TF bit.
+ *
+ * Needless to say, we've had bugs in this area.
+ */
+ syscall(SYS_getpid); /* Force symbol binding without TF set. */
+ printf("[RUN]\tSet TF and check a fast syscall\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ syscall(SYS_getpid);
+ check_result();
+
+ /* Now make sure that another fast syscall doesn't set TF again. */
+ printf("[RUN]\tFast syscall with TF cleared\n");
+ fflush(stdout); /* Force a syscall */
+ if (get_eflags() & X86_EFLAGS_TF) {
+ printf("[FAIL]\tTF is now set\n");
+ exit(1);
+ }
+ if (sig_traps) {
+ printf("[FAIL]\tGot SIGTRAP\n");
+ exit(1);
+ }
+ printf("[OK]\tNothing unexpected happened\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
new file mode 100644
index 000000000..7db4fc9fa
--- /dev/null
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -0,0 +1,130 @@
+/*
+ * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <err.h>
+#include <setjmp.h>
+#include <errno.h>
+
+/* Our sigaltstack scratch space. */
+static unsigned char altstack_data[SIGSTKSZ];
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static volatile sig_atomic_t sig_traps;
+static sigjmp_buf jmpbuf;
+
+static volatile sig_atomic_t n_errs;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) {
+ printf("[FAIL]\tAX had the wrong value: 0x%x\n",
+ ctx->uc_mcontext.gregs[REG_EAX]);
+ n_errs++;
+ } else {
+ printf("[OK]\tSeems okay\n");
+ }
+
+ siglongjmp(jmpbuf, 1);
+}
+
+static void sigill(int sig, siginfo_t *info, void *ctx_void)
+{
+ printf("[SKIP]\tIllegal instruction\n");
+ siglongjmp(jmpbuf, 1);
+}
+
+int main()
+{
+ stack_t stack = {
+ .ss_sp = altstack_data,
+ .ss_size = SIGSTKSZ,
+ };
+ if (sigaltstack(&stack, NULL) != 0)
+ err(1, "sigaltstack");
+
+ sethandler(SIGSEGV, sigsegv, SA_ONSTACK);
+ sethandler(SIGILL, sigill, SA_ONSTACK);
+
+ /*
+ * Exercise another nasty special case. The 32-bit SYSCALL
+ * and SYSENTER instructions (even in compat mode) each
+ * clobber one register. A Linux system call has a syscall
+ * number and six arguments, and the user stack pointer
+ * needs to live in some register on return. That means
+ * that we need eight registers, but SYSCALL and SYSENTER
+ * only preserve seven registers. As a result, one argument
+ * ends up on the stack. The stack is user memory, which
+ * means that the kernel can fail to read it.
+ *
+ * The 32-bit fast system calls don't have a defined ABI:
+ * we're supposed to invoke them through the vDSO. So we'll
+ * fudge it: we set all regs to invalid pointer values and
+ * invoke the entry instruction. The return will fail no
+ * matter what, and we completely lose our program state,
+ * but we can fix it up with a signal handler.
+ */
+
+ printf("[RUN]\tSYSENTER with invalid state\n");
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ asm volatile (
+ "movl $-1, %%eax\n\t"
+ "movl $-1, %%ebx\n\t"
+ "movl $-1, %%ecx\n\t"
+ "movl $-1, %%edx\n\t"
+ "movl $-1, %%esi\n\t"
+ "movl $-1, %%edi\n\t"
+ "movl $-1, %%ebp\n\t"
+ "movl $-1, %%esp\n\t"
+ "sysenter"
+ : : : "memory", "flags");
+ }
+
+ printf("[RUN]\tSYSCALL with invalid state\n");
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ asm volatile (
+ "movl $-1, %%eax\n\t"
+ "movl $-1, %%ebx\n\t"
+ "movl $-1, %%ecx\n\t"
+ "movl $-1, %%edx\n\t"
+ "movl $-1, %%esi\n\t"
+ "movl $-1, %%edi\n\t"
+ "movl $-1, %%ebp\n\t"
+ "movl $-1, %%esp\n\t"
+ "syscall\n\t"
+ "pushl $0" /* make sure we segfault cleanly */
+ : : : "memory", "flags");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c
new file mode 100644
index 000000000..74e6b3fc2
--- /dev/null
+++ b/tools/testing/selftests/x86/syscall_nt.c
@@ -0,0 +1,96 @@
+/*
+ * syscall_nt.c - checks syscalls with NT set
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Some obscure user-space code requires the ability to make system calls
+ * with FLAGS.NT set. Make sure it works.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <signal.h>
+#include <err.h>
+#include <sys/syscall.h>
+#include <asm/processor-flags.h>
+
+#ifdef __x86_64__
+# define WIDTH "q"
+#else
+# define WIDTH "l"
+#endif
+
+static unsigned int nerrs;
+
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+ : : "rm" (eflags) : "flags");
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
+{
+}
+
+static void do_it(unsigned long extraflags)
+{
+ unsigned long flags;
+
+ set_eflags(get_eflags() | extraflags);
+ syscall(SYS_getpid);
+ flags = get_eflags();
+ set_eflags(X86_EFLAGS_IF | X86_EFLAGS_FIXED);
+ if ((flags & extraflags) == extraflags) {
+ printf("[OK]\tThe syscall worked and flags are still set\n");
+ } else {
+ printf("[FAIL]\tThe syscall worked but flags were cleared (flags = 0x%lx but expected 0x%lx set)\n",
+ flags, extraflags);
+ nerrs++;
+ }
+}
+
+int main(void)
+{
+ printf("[RUN]\tSet NT and issue a syscall\n");
+ do_it(X86_EFLAGS_NT);
+
+ /*
+ * Now try it again with TF set -- TF forces returns via IRET in all
+ * cases except non-ptregs-using 64-bit full fast path syscalls.
+ */
+
+ sethandler(SIGTRAP, sigtrap, 0);
+
+ printf("[RUN]\tSet NT|TF and issue a syscall\n");
+ do_it(X86_EFLAGS_NT | X86_EFLAGS_TF);
+
+ return nerrs == 0 ? 0 : 1;
+}
diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c
new file mode 100644
index 000000000..d85ec5b36
--- /dev/null
+++ b/tools/testing/selftests/x86/sysret_rip.c
@@ -0,0 +1,195 @@
+/*
+ * sigreturn.c - tests that x86 avoids Intel SYSRET pitfalls
+ * Copyright (c) 2014-2016 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <sys/syscall.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <setjmp.h>
+#include <sys/user.h>
+#include <sys/mman.h>
+#include <assert.h>
+
+
+asm (
+ ".pushsection \".text\", \"ax\"\n\t"
+ ".balign 4096\n\t"
+ "test_page: .globl test_page\n\t"
+ ".fill 4094,1,0xcc\n\t"
+ "test_syscall_insn:\n\t"
+ "syscall\n\t"
+ ".ifne . - test_page - 4096\n\t"
+ ".error \"test page is not one page long\"\n\t"
+ ".endif\n\t"
+ ".popsection"
+ );
+
+extern const char test_page[];
+static void const *current_test_page_addr = test_page;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+/* State used by our signal handlers. */
+static gregset_t initial_regs;
+
+static volatile unsigned long rip;
+
+static void sigsegv_for_sigreturn_test(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (rip != ctx->uc_mcontext.gregs[REG_RIP]) {
+ printf("[FAIL]\tRequested RIP=0x%lx but got RIP=0x%lx\n",
+ rip, (unsigned long)ctx->uc_mcontext.gregs[REG_RIP]);
+ fflush(stdout);
+ _exit(1);
+ }
+
+ memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
+
+ printf("[OK]\tGot SIGSEGV at RIP=0x%lx\n", rip);
+}
+
+static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+
+ /* Set IP and CX to match so that SYSRET can happen. */
+ ctx->uc_mcontext.gregs[REG_RIP] = rip;
+ ctx->uc_mcontext.gregs[REG_RCX] = rip;
+
+ /* R11 and EFLAGS should already match. */
+ assert(ctx->uc_mcontext.gregs[REG_EFL] ==
+ ctx->uc_mcontext.gregs[REG_R11]);
+
+ sethandler(SIGSEGV, sigsegv_for_sigreturn_test, SA_RESETHAND);
+
+ return;
+}
+
+static void test_sigreturn_to(unsigned long ip)
+{
+ rip = ip;
+ printf("[RUN]\tsigreturn to 0x%lx\n", ip);
+ raise(SIGUSR1);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv_for_fallthrough(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (rip != ctx->uc_mcontext.gregs[REG_RIP]) {
+ printf("[FAIL]\tExpected SIGSEGV at 0x%lx but got RIP=0x%lx\n",
+ rip, (unsigned long)ctx->uc_mcontext.gregs[REG_RIP]);
+ fflush(stdout);
+ _exit(1);
+ }
+
+ siglongjmp(jmpbuf, 1);
+}
+
+static void test_syscall_fallthrough_to(unsigned long ip)
+{
+ void *new_address = (void *)(ip - 4096);
+ void *ret;
+
+ printf("[RUN]\tTrying a SYSCALL that falls through to 0x%lx\n", ip);
+
+ ret = mremap((void *)current_test_page_addr, 4096, 4096,
+ MREMAP_MAYMOVE | MREMAP_FIXED, new_address);
+ if (ret == MAP_FAILED) {
+ if (ip <= (1UL << 47) - PAGE_SIZE) {
+ err(1, "mremap to %p", new_address);
+ } else {
+ printf("[OK]\tmremap to %p failed\n", new_address);
+ return;
+ }
+ }
+
+ if (ret != new_address)
+ errx(1, "mremap malfunctioned: asked for %p but got %p\n",
+ new_address, ret);
+
+ current_test_page_addr = new_address;
+ rip = ip;
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ asm volatile ("call *%[syscall_insn]" :: "a" (SYS_getpid),
+ [syscall_insn] "rm" (ip - 2));
+ errx(1, "[FAIL]\tSyscall trampoline returned");
+ }
+
+ printf("[OK]\tWe survived\n");
+}
+
+int main()
+{
+ /*
+ * When the kernel returns from a slow-path syscall, it will
+ * detect whether SYSRET is appropriate. If it incorrectly
+ * thinks that SYSRET is appropriate when RIP is noncanonical,
+ * it'll crash on Intel CPUs.
+ */
+ sethandler(SIGUSR1, sigusr1, 0);
+ for (int i = 47; i < 64; i++)
+ test_sigreturn_to(1UL<<i);
+
+ clearhandler(SIGUSR1);
+
+ sethandler(SIGSEGV, sigsegv_for_fallthrough, 0);
+
+ /* One extra test to check that we didn't screw up the mremap logic. */
+ test_syscall_fallthrough_to((1UL << 47) - 2*PAGE_SIZE);
+
+ /* These are the interesting cases. */
+ for (int i = 47; i < 64; i++) {
+ test_syscall_fallthrough_to((1UL<<i) - PAGE_SIZE);
+ test_syscall_fallthrough_to(1UL<<i);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/sysret_ss_attrs.c b/tools/testing/selftests/x86/sysret_ss_attrs.c
new file mode 100644
index 000000000..ce42d5a64
--- /dev/null
+++ b/tools/testing/selftests/x86/sysret_ss_attrs.c
@@ -0,0 +1,112 @@
+/*
+ * sysret_ss_attrs.c - test that syscalls return valid hidden SS attributes
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * On AMD CPUs, SYSRET can return with a valid SS descriptor with with
+ * the hidden attributes set to an unusable state. Make sure the kernel
+ * doesn't let this happen.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <pthread.h>
+
+static void *threadproc(void *ctx)
+{
+ /*
+ * Do our best to cause sleeps on this CPU to exit the kernel and
+ * re-enter with SS = 0.
+ */
+ while (true)
+ ;
+
+ return NULL;
+}
+
+#ifdef __x86_64__
+extern unsigned long call32_from_64(void *stack, void (*function)(void));
+
+asm (".pushsection .text\n\t"
+ ".code32\n\t"
+ "test_ss:\n\t"
+ "pushl $0\n\t"
+ "popl %eax\n\t"
+ "ret\n\t"
+ ".code64");
+extern void test_ss(void);
+#endif
+
+int main()
+{
+ /*
+ * Start a busy-looping thread on the same CPU we're on.
+ * For simplicity, just stick everything to CPU 0. This will
+ * fail in some containers, but that's probably okay.
+ */
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ printf("[WARN]\tsched_setaffinity failed\n");
+
+ pthread_t thread;
+ if (pthread_create(&thread, 0, threadproc, 0) != 0)
+ err(1, "pthread_create");
+
+#ifdef __x86_64__
+ unsigned char *stack32 = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
+ MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE,
+ -1, 0);
+ if (stack32 == MAP_FAILED)
+ err(1, "mmap");
+#endif
+
+ printf("[RUN]\tSyscalls followed by SS validation\n");
+
+ for (int i = 0; i < 1000; i++) {
+ /*
+ * Go to sleep and return using sysret (if we're 64-bit
+ * or we're 32-bit on AMD on a 64-bit kernel). On AMD CPUs,
+ * SYSRET doesn't fix up the cached SS descriptor, so the
+ * kernel needs some kind of workaround to make sure that we
+ * end the system call with a valid stack segment. This
+ * can be a confusing failure because the SS *selector*
+ * is the same regardless.
+ */
+ usleep(2);
+
+#ifdef __x86_64__
+ /*
+ * On 32-bit, just doing a syscall through glibc is enough
+ * to cause a crash if our cached SS descriptor is invalid.
+ * On 64-bit, it's not, so try extra hard.
+ */
+ call32_from_64(stack32 + 4088, test_ss);
+#endif
+ }
+
+ printf("[OK]\tWe survived\n");
+
+#ifdef __x86_64__
+ munmap(stack32, 4096);
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/test_FCMOV.c b/tools/testing/selftests/x86/test_FCMOV.c
new file mode 100644
index 000000000..6b5036fbb
--- /dev/null
+++ b/tools/testing/selftests/x86/test_FCMOV.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#undef __USE_GNU
+#define __USE_GNU 1
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+
+#define TEST(insn) \
+long double __attribute__((noinline)) insn(long flags) \
+{ \
+ long double out; \
+ asm ("\n" \
+ " push %1""\n" \
+ " popf""\n" \
+ " fldpi""\n" \
+ " fld1""\n" \
+ " " #insn " %%st(1), %%st" "\n" \
+ " ffree %%st(1)" "\n" \
+ : "=t" (out) \
+ : "r" (flags) \
+ ); \
+ return out; \
+}
+
+TEST(fcmovb)
+TEST(fcmove)
+TEST(fcmovbe)
+TEST(fcmovu)
+TEST(fcmovnb)
+TEST(fcmovne)
+TEST(fcmovnbe)
+TEST(fcmovnu)
+
+enum {
+ CF = 1 << 0,
+ PF = 1 << 2,
+ ZF = 1 << 6,
+};
+
+void sighandler(int sig)
+{
+ printf("[FAIL]\tGot signal %d, exiting\n", sig);
+ exit(1);
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ int err = 0;
+
+ /* SIGILL triggers on 32-bit kernels w/o fcomi emulation
+ * when run with "no387 nofxsr". Other signals are caught
+ * just in case.
+ */
+ signal(SIGILL, sighandler);
+ signal(SIGFPE, sighandler);
+ signal(SIGSEGV, sighandler);
+
+ printf("[RUN]\tTesting fcmovCC instructions\n");
+ /* If fcmovCC() returns 1.0, the move wasn't done */
+ err |= !(fcmovb(0) == 1.0); err |= !(fcmovnb(0) != 1.0);
+ err |= !(fcmove(0) == 1.0); err |= !(fcmovne(0) != 1.0);
+ err |= !(fcmovbe(0) == 1.0); err |= !(fcmovnbe(0) != 1.0);
+ err |= !(fcmovu(0) == 1.0); err |= !(fcmovnu(0) != 1.0);
+
+ err |= !(fcmovb(CF) != 1.0); err |= !(fcmovnb(CF) == 1.0);
+ err |= !(fcmove(CF) == 1.0); err |= !(fcmovne(CF) != 1.0);
+ err |= !(fcmovbe(CF) != 1.0); err |= !(fcmovnbe(CF) == 1.0);
+ err |= !(fcmovu(CF) == 1.0); err |= !(fcmovnu(CF) != 1.0);
+
+ err |= !(fcmovb(ZF) == 1.0); err |= !(fcmovnb(ZF) != 1.0);
+ err |= !(fcmove(ZF) != 1.0); err |= !(fcmovne(ZF) == 1.0);
+ err |= !(fcmovbe(ZF) != 1.0); err |= !(fcmovnbe(ZF) == 1.0);
+ err |= !(fcmovu(ZF) == 1.0); err |= !(fcmovnu(ZF) != 1.0);
+
+ err |= !(fcmovb(PF) == 1.0); err |= !(fcmovnb(PF) != 1.0);
+ err |= !(fcmove(PF) == 1.0); err |= !(fcmovne(PF) != 1.0);
+ err |= !(fcmovbe(PF) == 1.0); err |= !(fcmovnbe(PF) != 1.0);
+ err |= !(fcmovu(PF) != 1.0); err |= !(fcmovnu(PF) == 1.0);
+
+ if (!err)
+ printf("[OK]\tfcmovCC\n");
+ else
+ printf("[FAIL]\tfcmovCC errors: %d\n", err);
+
+ return err;
+}
diff --git a/tools/testing/selftests/x86/test_FCOMI.c b/tools/testing/selftests/x86/test_FCOMI.c
new file mode 100644
index 000000000..aec6692c6
--- /dev/null
+++ b/tools/testing/selftests/x86/test_FCOMI.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#undef __USE_GNU
+#define __USE_GNU 1
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <fenv.h>
+
+enum {
+ CF = 1 << 0,
+ PF = 1 << 2,
+ ZF = 1 << 6,
+ ARITH = CF | PF | ZF,
+};
+
+long res_fcomi_pi_1;
+long res_fcomi_1_pi;
+long res_fcomi_1_1;
+long res_fcomi_nan_1;
+/* sNaN is s|111 1111 1|1xx xxxx xxxx xxxx xxxx xxxx */
+/* qNaN is s|111 1111 1|0xx xxxx xxxx xxxx xxxx xxxx (some x must be nonzero) */
+int snan = 0x7fc11111;
+int qnan = 0x7f811111;
+unsigned short snan1[5];
+/* sNaN80 is s|111 1111 1111 1111 |10xx xx...xx (some x must be nonzero) */
+unsigned short snan80[5] = { 0x1111, 0x1111, 0x1111, 0x8111, 0x7fff };
+
+int test(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fld1""\n"
+ " fldpi""\n"
+ " fcomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_1_pi""\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fldpi""\n"
+ " fld1""\n"
+ " fcomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_pi_1""\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fld1""\n"
+ " fld1""\n"
+ " fcomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_1_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_1_pi & ARITH) != (0)) {
+ printf("[BAD]\tfcomi_1_pi with flags:%lx\n", flags);
+ return 1;
+ }
+ if ((res_fcomi_pi_1 & ARITH) != (CF)) {
+ printf("[BAD]\tfcomi_pi_1 with flags:%lx->%lx\n", flags, res_fcomi_pi_1 & ARITH);
+ return 1;
+ }
+ if ((res_fcomi_1_1 & ARITH) != (ZF)) {
+ printf("[BAD]\tfcomi_1_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != 0) {
+ printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int test_qnan(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+ " push %0""\n"
+ " popf""\n"
+ " flds qnan""\n"
+ " fld1""\n"
+ " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it
+ " fcomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_nan_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+ printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != FE_INVALID) {
+ printf("[BAD]\tFE_INVALID is not set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int testu_qnan(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+ " push %0""\n"
+ " popf""\n"
+ " flds qnan""\n"
+ " fld1""\n"
+ " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it
+ " fucomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_nan_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+ printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != 0) {
+ printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int testu_snan(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+ " push %0""\n"
+ " popf""\n"
+// " flds snan""\n" // WRONG, this will convert 32-bit fp snan to a *qnan* in 80-bit fp register!
+// " fstpt snan1""\n" // if uncommented, it prints "snan1:7fff c111 1100 0000 0000" - c111, not 8111!
+// " fnclex""\n" // flds of a snan raised FE_INVALID, clear it
+ " fldt snan80""\n" // fldt never raise FE_INVALID
+ " fld1""\n"
+ " fucomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_nan_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+ printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+ return 1;
+ }
+// printf("snan:%x snan1:%04x %04x %04x %04x %04x\n", snan, snan1[4], snan1[3], snan1[2], snan1[1], snan1[0]);
+ if (fetestexcept(FE_INVALID) != FE_INVALID) {
+ printf("[BAD]\tFE_INVALID is not set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int testp(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fld1""\n"
+ " fldpi""\n"
+ " fcomip %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_1_pi""\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fldpi""\n"
+ " fld1""\n"
+ " fcomip %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_pi_1""\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fld1""\n"
+ " fld1""\n"
+ " fcomip %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_1_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_1_pi & ARITH) != (0)) {
+ printf("[BAD]\tfcomi_1_pi with flags:%lx\n", flags);
+ return 1;
+ }
+ if ((res_fcomi_pi_1 & ARITH) != (CF)) {
+ printf("[BAD]\tfcomi_pi_1 with flags:%lx->%lx\n", flags, res_fcomi_pi_1 & ARITH);
+ return 1;
+ }
+ if ((res_fcomi_1_1 & ARITH) != (ZF)) {
+ printf("[BAD]\tfcomi_1_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != 0) {
+ printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int testp_qnan(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+ " push %0""\n"
+ " popf""\n"
+ " flds qnan""\n"
+ " fld1""\n"
+ " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it
+ " fcomip %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_nan_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+ printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != FE_INVALID) {
+ printf("[BAD]\tFE_INVALID is not set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int testup_qnan(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+ " push %0""\n"
+ " popf""\n"
+ " flds qnan""\n"
+ " fld1""\n"
+ " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it
+ " fucomip %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_nan_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+ printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != 0) {
+ printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+void sighandler(int sig)
+{
+ printf("[FAIL]\tGot signal %d, exiting\n", sig);
+ exit(1);
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ int err = 0;
+
+ /* SIGILL triggers on 32-bit kernels w/o fcomi emulation
+ * when run with "no387 nofxsr". Other signals are caught
+ * just in case.
+ */
+ signal(SIGILL, sighandler);
+ signal(SIGFPE, sighandler);
+ signal(SIGSEGV, sighandler);
+
+ printf("[RUN]\tTesting f[u]comi[p] instructions\n");
+ err |= test(0);
+ err |= test_qnan(0);
+ err |= testu_qnan(0);
+ err |= testu_snan(0);
+ err |= test(CF|ZF|PF);
+ err |= test_qnan(CF|ZF|PF);
+ err |= testu_qnan(CF|ZF|PF);
+ err |= testu_snan(CF|ZF|PF);
+ err |= testp(0);
+ err |= testp_qnan(0);
+ err |= testup_qnan(0);
+ err |= testp(CF|ZF|PF);
+ err |= testp_qnan(CF|ZF|PF);
+ err |= testup_qnan(CF|ZF|PF);
+ if (!err)
+ printf("[OK]\tf[u]comi[p]\n");
+ else
+ printf("[FAIL]\tf[u]comi[p] errors: %d\n", err);
+
+ return err;
+}
diff --git a/tools/testing/selftests/x86/test_FISTTP.c b/tools/testing/selftests/x86/test_FISTTP.c
new file mode 100644
index 000000000..09789c0ce
--- /dev/null
+++ b/tools/testing/selftests/x86/test_FISTTP.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#undef __USE_GNU
+#define __USE_GNU 1
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <fenv.h>
+
+unsigned long long res64 = -1;
+unsigned int res32 = -1;
+unsigned short res16 = -1;
+
+int test(void)
+{
+ int ex;
+
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ asm volatile ("\n"
+ " fld1""\n"
+ " fisttp res16""\n"
+ " fld1""\n"
+ " fisttpl res32""\n"
+ " fld1""\n"
+ " fisttpll res64""\n"
+ : : : "memory"
+ );
+ if (res16 != 1 || res32 != 1 || res64 != 1) {
+ printf("[BAD]\tfisttp 1\n");
+ return 1;
+ }
+ ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ if (ex != 0) {
+ printf("[BAD]\tfisttp 1: wrong exception state\n");
+ return 1;
+ }
+
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ asm volatile ("\n"
+ " fldpi""\n"
+ " fisttp res16""\n"
+ " fldpi""\n"
+ " fisttpl res32""\n"
+ " fldpi""\n"
+ " fisttpll res64""\n"
+ : : : "memory"
+ );
+ if (res16 != 3 || res32 != 3 || res64 != 3) {
+ printf("[BAD]\tfisttp pi\n");
+ return 1;
+ }
+ ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ if (ex != FE_INEXACT) {
+ printf("[BAD]\tfisttp pi: wrong exception state\n");
+ return 1;
+ }
+
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ asm volatile ("\n"
+ " fldpi""\n"
+ " fchs""\n"
+ " fisttp res16""\n"
+ " fldpi""\n"
+ " fchs""\n"
+ " fisttpl res32""\n"
+ " fldpi""\n"
+ " fchs""\n"
+ " fisttpll res64""\n"
+ : : : "memory"
+ );
+ if (res16 != 0xfffd || res32 != 0xfffffffd || res64 != 0xfffffffffffffffdULL) {
+ printf("[BAD]\tfisttp -pi\n");
+ return 1;
+ }
+ ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ if (ex != FE_INEXACT) {
+ printf("[BAD]\tfisttp -pi: wrong exception state\n");
+ return 1;
+ }
+
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ asm volatile ("\n"
+ " fldln2""\n"
+ " fisttp res16""\n"
+ " fldln2""\n"
+ " fisttpl res32""\n"
+ " fldln2""\n"
+ " fisttpll res64""\n"
+ : : : "memory"
+ );
+ /* Test truncation to zero (round-to-nearest would give 1 here) */
+ if (res16 != 0 || res32 != 0 || res64 != 0) {
+ printf("[BAD]\tfisttp ln2\n");
+ return 1;
+ }
+ ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ if (ex != FE_INEXACT) {
+ printf("[BAD]\tfisttp ln2: wrong exception state\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+void sighandler(int sig)
+{
+ printf("[FAIL]\tGot signal %d, exiting\n", sig);
+ exit(1);
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ int err = 0;
+
+ /* SIGILL triggers on 32-bit kernels w/o fisttp emulation
+ * when run with "no387 nofxsr". Other signals are caught
+ * just in case.
+ */
+ signal(SIGILL, sighandler);
+ signal(SIGFPE, sighandler);
+ signal(SIGSEGV, sighandler);
+
+ printf("[RUN]\tTesting fisttp instructions\n");
+ err |= test();
+ if (!err)
+ printf("[OK]\tfisttp\n");
+ else
+ printf("[FAIL]\tfisttp errors: %d\n", err);
+
+ return err;
+}
diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
new file mode 100644
index 000000000..64f11c8d9
--- /dev/null
+++ b/tools/testing/selftests/x86/test_mremap_vdso.c
@@ -0,0 +1,115 @@
+/*
+ * 32-bit test to check vDSO mremap.
+ *
+ * Copyright (c) 2016 Dmitry Safonov
+ * Suggested-by: Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Can be built statically:
+ * gcc -Os -Wall -static -m32 test_mremap_vdso.c
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+
+#include <sys/mman.h>
+#include <sys/auxv.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+
+#define PAGE_SIZE 4096
+
+static int try_to_remap(void *vdso_addr, unsigned long size)
+{
+ void *dest_addr, *new_addr;
+
+ /* Searching for memory location where to remap */
+ dest_addr = mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (dest_addr == MAP_FAILED) {
+ printf("[WARN]\tmmap failed (%d): %m\n", errno);
+ return 0;
+ }
+
+ printf("[NOTE]\tMoving vDSO: [%p, %#lx] -> [%p, %#lx]\n",
+ vdso_addr, (unsigned long)vdso_addr + size,
+ dest_addr, (unsigned long)dest_addr + size);
+ fflush(stdout);
+
+ new_addr = mremap(vdso_addr, size, size,
+ MREMAP_FIXED|MREMAP_MAYMOVE, dest_addr);
+ if ((unsigned long)new_addr == (unsigned long)-1) {
+ munmap(dest_addr, size);
+ if (errno == EINVAL) {
+ printf("[NOTE]\tvDSO partial move failed, will try with bigger size\n");
+ return -1; /* Retry with larger */
+ }
+ printf("[FAIL]\tmremap failed (%d): %m\n", errno);
+ return 1;
+ }
+
+ return 0;
+
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ pid_t child;
+
+ child = fork();
+ if (child == -1) {
+ printf("[WARN]\tfailed to fork (%d): %m\n", errno);
+ return 1;
+ }
+
+ if (child == 0) {
+ unsigned long vdso_size = PAGE_SIZE;
+ unsigned long auxval;
+ int ret = -1;
+
+ auxval = getauxval(AT_SYSINFO_EHDR);
+ printf("\tAT_SYSINFO_EHDR is %#lx\n", auxval);
+ if (!auxval || auxval == -ENOENT) {
+ printf("[WARN]\tgetauxval failed\n");
+ return 0;
+ }
+
+ /* Simpler than parsing ELF header */
+ while (ret < 0) {
+ ret = try_to_remap((void *)auxval, vdso_size);
+ vdso_size += PAGE_SIZE;
+ }
+
+#ifdef __i386__
+ /* Glibc is likely to explode now - exit with raw syscall */
+ asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret));
+#else /* __x86_64__ */
+ syscall(SYS_exit, ret);
+#endif
+ } else {
+ int status;
+
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ printf("[FAIL]\tmremap() of the vDSO does not work on this kernel!\n");
+ return 1;
+ } else if (WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed with %d\n",
+ WEXITSTATUS(status));
+ return 1;
+ }
+ printf("[OK]\n");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/test_syscall_vdso.c b/tools/testing/selftests/x86/test_syscall_vdso.c
new file mode 100644
index 000000000..c9c328107
--- /dev/null
+++ b/tools/testing/selftests/x86/test_syscall_vdso.c
@@ -0,0 +1,408 @@
+/*
+ * 32-bit syscall ABI conformance test.
+ *
+ * Copyright (c) 2015 Denys Vlasenko
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Can be built statically:
+ * gcc -Os -Wall -static -m32 test_syscall_vdso.c thunks_32.S
+ */
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#undef __USE_GNU
+#define __USE_GNU 1
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <elf.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+
+#if !defined(__i386__)
+int main(int argc, char **argv, char **envp)
+{
+ printf("[SKIP]\tNot a 32-bit x86 userspace\n");
+ return 0;
+}
+#else
+
+long syscall_addr;
+long get_syscall(char **envp)
+{
+ Elf32_auxv_t *auxv;
+ while (*envp++ != NULL)
+ continue;
+ for (auxv = (void *)envp; auxv->a_type != AT_NULL; auxv++)
+ if (auxv->a_type == AT_SYSINFO)
+ return auxv->a_un.a_val;
+ printf("[WARN]\tAT_SYSINFO not supplied\n");
+ return 0;
+}
+
+asm (
+ " .pushsection .text\n"
+ " .global int80\n"
+ "int80:\n"
+ " int $0x80\n"
+ " ret\n"
+ " .popsection\n"
+);
+extern char int80;
+
+struct regs64 {
+ uint64_t rax, rbx, rcx, rdx;
+ uint64_t rsi, rdi, rbp, rsp;
+ uint64_t r8, r9, r10, r11;
+ uint64_t r12, r13, r14, r15;
+};
+struct regs64 regs64;
+int kernel_is_64bit;
+
+asm (
+ " .pushsection .text\n"
+ " .code64\n"
+ "get_regs64:\n"
+ " push %rax\n"
+ " mov $regs64, %eax\n"
+ " pop 0*8(%rax)\n"
+ " movq %rbx, 1*8(%rax)\n"
+ " movq %rcx, 2*8(%rax)\n"
+ " movq %rdx, 3*8(%rax)\n"
+ " movq %rsi, 4*8(%rax)\n"
+ " movq %rdi, 5*8(%rax)\n"
+ " movq %rbp, 6*8(%rax)\n"
+ " movq %rsp, 7*8(%rax)\n"
+ " movq %r8, 8*8(%rax)\n"
+ " movq %r9, 9*8(%rax)\n"
+ " movq %r10, 10*8(%rax)\n"
+ " movq %r11, 11*8(%rax)\n"
+ " movq %r12, 12*8(%rax)\n"
+ " movq %r13, 13*8(%rax)\n"
+ " movq %r14, 14*8(%rax)\n"
+ " movq %r15, 15*8(%rax)\n"
+ " ret\n"
+ "poison_regs64:\n"
+ " movq $0x7f7f7f7f, %r8\n"
+ " shl $32, %r8\n"
+ " orq $0x7f7f7f7f, %r8\n"
+ " movq %r8, %r9\n"
+ " incq %r9\n"
+ " movq %r9, %r10\n"
+ " incq %r10\n"
+ " movq %r10, %r11\n"
+ " incq %r11\n"
+ " movq %r11, %r12\n"
+ " incq %r12\n"
+ " movq %r12, %r13\n"
+ " incq %r13\n"
+ " movq %r13, %r14\n"
+ " incq %r14\n"
+ " movq %r14, %r15\n"
+ " incq %r15\n"
+ " ret\n"
+ " .code32\n"
+ " .popsection\n"
+);
+extern void get_regs64(void);
+extern void poison_regs64(void);
+extern unsigned long call64_from_32(void (*function)(void));
+void print_regs64(void)
+{
+ if (!kernel_is_64bit)
+ return;
+ printf("ax:%016llx bx:%016llx cx:%016llx dx:%016llx\n", regs64.rax, regs64.rbx, regs64.rcx, regs64.rdx);
+ printf("si:%016llx di:%016llx bp:%016llx sp:%016llx\n", regs64.rsi, regs64.rdi, regs64.rbp, regs64.rsp);
+ printf(" 8:%016llx 9:%016llx 10:%016llx 11:%016llx\n", regs64.r8 , regs64.r9 , regs64.r10, regs64.r11);
+ printf("12:%016llx 13:%016llx 14:%016llx 15:%016llx\n", regs64.r12, regs64.r13, regs64.r14, regs64.r15);
+}
+
+int check_regs64(void)
+{
+ int err = 0;
+ int num = 8;
+ uint64_t *r64 = &regs64.r8;
+ uint64_t expected = 0x7f7f7f7f7f7f7f7fULL;
+
+ if (!kernel_is_64bit)
+ return 0;
+
+ do {
+ if (*r64 == expected++)
+ continue; /* register did not change */
+ if (syscall_addr != (long)&int80) {
+ /*
+ * Non-INT80 syscall entrypoints are allowed to clobber R8+ regs:
+ * either clear them to 0, or for R11, load EFLAGS.
+ */
+ if (*r64 == 0)
+ continue;
+ if (num == 11) {
+ printf("[NOTE]\tR11 has changed:%016llx - assuming clobbered by SYSRET insn\n", *r64);
+ continue;
+ }
+ } else {
+ /*
+ * INT80 syscall entrypoint can be used by
+ * 64-bit programs too, unlike SYSCALL/SYSENTER.
+ * Therefore it must preserve R12+
+ * (they are callee-saved registers in 64-bit C ABI).
+ *
+ * Starting in Linux 4.17 (and any kernel that
+ * backports the change), R8..11 are preserved.
+ * Historically (and probably unintentionally), they
+ * were clobbered or zeroed.
+ */
+ }
+ printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64);
+ err++;
+ } while (r64++, ++num < 16);
+
+ if (!err)
+ printf("[OK]\tR8..R15 did not leak kernel data\n");
+ return err;
+}
+
+int nfds;
+fd_set rfds;
+fd_set wfds;
+fd_set efds;
+struct timespec timeout;
+sigset_t sigmask;
+struct {
+ sigset_t *sp;
+ int sz;
+} sigmask_desc;
+
+void prep_args()
+{
+ nfds = 42;
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
+ FD_ZERO(&efds);
+ FD_SET(0, &rfds);
+ FD_SET(1, &wfds);
+ FD_SET(2, &efds);
+ timeout.tv_sec = 0;
+ timeout.tv_nsec = 123;
+ sigemptyset(&sigmask);
+ sigaddset(&sigmask, SIGINT);
+ sigaddset(&sigmask, SIGUSR2);
+ sigaddset(&sigmask, SIGRTMAX);
+ sigmask_desc.sp = &sigmask;
+ sigmask_desc.sz = 8; /* bytes */
+}
+
+static void print_flags(const char *name, unsigned long r)
+{
+ static const char *bitarray[] = {
+ "\n" ,"c\n" ,/* Carry Flag */
+ "0 " ,"1 " ,/* Bit 1 - always on */
+ "" ,"p " ,/* Parity Flag */
+ "0 " ,"3? " ,
+ "" ,"a " ,/* Auxiliary carry Flag */
+ "0 " ,"5? " ,
+ "" ,"z " ,/* Zero Flag */
+ "" ,"s " ,/* Sign Flag */
+ "" ,"t " ,/* Trap Flag */
+ "" ,"i " ,/* Interrupt Flag */
+ "" ,"d " ,/* Direction Flag */
+ "" ,"o " ,/* Overflow Flag */
+ "0 " ,"1 " ,/* I/O Privilege Level (2 bits) */
+ "0" ,"1" ,/* I/O Privilege Level (2 bits) */
+ "" ,"n " ,/* Nested Task */
+ "0 " ,"15? ",
+ "" ,"r " ,/* Resume Flag */
+ "" ,"v " ,/* Virtual Mode */
+ "" ,"ac " ,/* Alignment Check/Access Control */
+ "" ,"vif ",/* Virtual Interrupt Flag */
+ "" ,"vip ",/* Virtual Interrupt Pending */
+ "" ,"id " ,/* CPUID detection */
+ NULL
+ };
+ const char **bitstr;
+ int bit;
+
+ printf("%s=%016lx ", name, r);
+ bitstr = bitarray + 42;
+ bit = 21;
+ if ((r >> 22) != 0)
+ printf("(extra bits are set) ");
+ do {
+ if (bitstr[(r >> bit) & 1][0])
+ fputs(bitstr[(r >> bit) & 1], stdout);
+ bitstr -= 2;
+ bit--;
+ } while (bit >= 0);
+}
+
+int run_syscall(void)
+{
+ long flags, bad_arg;
+
+ prep_args();
+
+ if (kernel_is_64bit)
+ call64_from_32(poison_regs64);
+ /*print_regs64();*/
+
+ asm("\n"
+ /* Try 6-arg syscall: pselect. It should return quickly */
+ " push %%ebp\n"
+ " mov $308, %%eax\n" /* PSELECT */
+ " mov nfds, %%ebx\n" /* ebx arg1 */
+ " mov $rfds, %%ecx\n" /* ecx arg2 */
+ " mov $wfds, %%edx\n" /* edx arg3 */
+ " mov $efds, %%esi\n" /* esi arg4 */
+ " mov $timeout, %%edi\n" /* edi arg5 */
+ " mov $sigmask_desc, %%ebp\n" /* %ebp arg6 */
+ " push $0x200ed7\n" /* set almost all flags */
+ " popf\n" /* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */
+ " call *syscall_addr\n"
+ /* Check that registers are not clobbered */
+ " pushf\n"
+ " pop %%eax\n"
+ " cld\n"
+ " cmp nfds, %%ebx\n" /* ebx arg1 */
+ " mov $1, %%ebx\n"
+ " jne 1f\n"
+ " cmp $rfds, %%ecx\n" /* ecx arg2 */
+ " mov $2, %%ebx\n"
+ " jne 1f\n"
+ " cmp $wfds, %%edx\n" /* edx arg3 */
+ " mov $3, %%ebx\n"
+ " jne 1f\n"
+ " cmp $efds, %%esi\n" /* esi arg4 */
+ " mov $4, %%ebx\n"
+ " jne 1f\n"
+ " cmp $timeout, %%edi\n" /* edi arg5 */
+ " mov $5, %%ebx\n"
+ " jne 1f\n"
+ " cmpl $sigmask_desc, %%ebp\n" /* %ebp arg6 */
+ " mov $6, %%ebx\n"
+ " jne 1f\n"
+ " mov $0, %%ebx\n"
+ "1:\n"
+ " pop %%ebp\n"
+ : "=a" (flags), "=b" (bad_arg)
+ :
+ : "cx", "dx", "si", "di"
+ );
+
+ if (kernel_is_64bit) {
+ memset(&regs64, 0x77, sizeof(regs64));
+ call64_from_32(get_regs64);
+ /*print_regs64();*/
+ }
+
+ /*
+ * On paravirt kernels, flags are not preserved across syscalls.
+ * Thus, we do not consider it a bug if some are changed.
+ * We just show ones which do.
+ */
+ if ((0x200ed7 ^ flags) != 0) {
+ print_flags("[WARN]\tFlags before", 0x200ed7);
+ print_flags("[WARN]\tFlags after", flags);
+ print_flags("[WARN]\tFlags change", (0x200ed7 ^ flags));
+ }
+
+ if (bad_arg) {
+ printf("[FAIL]\targ#%ld clobbered\n", bad_arg);
+ return 1;
+ }
+ printf("[OK]\tArguments are preserved across syscall\n");
+
+ return check_regs64();
+}
+
+int run_syscall_twice()
+{
+ int exitcode = 0;
+ long sv;
+
+ if (syscall_addr) {
+ printf("[RUN]\tExecuting 6-argument 32-bit syscall via VDSO\n");
+ exitcode = run_syscall();
+ }
+ sv = syscall_addr;
+ syscall_addr = (long)&int80;
+ printf("[RUN]\tExecuting 6-argument 32-bit syscall via INT 80\n");
+ exitcode += run_syscall();
+ syscall_addr = sv;
+ return exitcode;
+}
+
+void ptrace_me()
+{
+ pid_t pid;
+
+ fflush(NULL);
+ pid = fork();
+ if (pid < 0)
+ exit(1);
+ if (pid == 0) {
+ /* child */
+ if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) != 0)
+ exit(0);
+ raise(SIGSTOP);
+ return;
+ }
+ /* parent */
+ printf("[RUN]\tRunning tests under ptrace\n");
+ while (1) {
+ int status;
+ pid = waitpid(-1, &status, __WALL);
+ if (WIFEXITED(status))
+ exit(WEXITSTATUS(status));
+ if (WIFSIGNALED(status))
+ exit(WTERMSIG(status));
+ if (pid <= 0 || !WIFSTOPPED(status)) /* paranoia */
+ exit(255);
+ /*
+ * Note: we do not inject sig = WSTOPSIG(status).
+ * We probably should, but careful: do not inject SIGTRAP
+ * generated by syscall entry/exit stops.
+ * That kills the child.
+ */
+ ptrace(PTRACE_SYSCALL, pid, 0L, 0L /*sig*/);
+ }
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ int exitcode = 0;
+ int cs;
+
+ asm("\n"
+ " movl %%cs, %%eax\n"
+ : "=a" (cs)
+ );
+ kernel_is_64bit = (cs == 0x23);
+ if (!kernel_is_64bit)
+ printf("[NOTE]\tNot a 64-bit kernel, won't test R8..R15 leaks\n");
+
+ /* This only works for non-static builds:
+ * syscall_addr = dlsym(dlopen("linux-gate.so.1", RTLD_NOW), "__kernel_vsyscall");
+ */
+ syscall_addr = get_syscall(envp);
+
+ exitcode += run_syscall_twice();
+ ptrace_me();
+ exitcode += run_syscall_twice();
+
+ return exitcode;
+}
+#endif
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
new file mode 100644
index 000000000..35edd61d1
--- /dev/null
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ldt_gdt.c - Test cases for LDT and GDT access
+ * Copyright (c) 2011-2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <limits.h>
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+# define SYS_getcpu 309
+# else
+# define SYS_getcpu 318
+# endif
+#endif
+
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
+int nerrs = 0;
+
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+
+vgettime_t vdso_clock_gettime;
+
+typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
+
+vgtod_t vdso_gettimeofday;
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+
+getcpu_t vgetcpu;
+getcpu_t vdso_getcpu;
+
+static void *vsyscall_getcpu(void)
+{
+#ifdef __x86_64__
+ FILE *maps;
+ char line[MAPS_LINE_LEN];
+ bool found = false;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */
+ return NULL;
+
+ while (fgets(line, MAPS_LINE_LEN, maps)) {
+ char r, x;
+ void *start, *end;
+ char name[MAPS_LINE_LEN];
+
+ /* sscanf() is safe here as strlen(name) >= strlen(line) */
+ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+ &start, &end, &r, &x, name) != 5)
+ continue;
+
+ if (strcmp(name, "[vsyscall]"))
+ continue;
+
+ /* assume entries are OK, as we test vDSO here not vsyscall */
+ found = true;
+ break;
+ }
+
+ fclose(maps);
+
+ if (!found) {
+ printf("Warning: failed to find vsyscall getcpu\n");
+ return NULL;
+ }
+ return (void *) (0xffffffffff600800);
+#else
+ return NULL;
+#endif
+}
+
+
+static void fill_function_pointers()
+{
+ void *vdso = dlopen("linux-vdso.so.1",
+ RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso)
+ vdso = dlopen("linux-gate.so.1",
+ RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso) {
+ printf("[WARN]\tfailed to find vDSO\n");
+ return;
+ }
+
+ vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+ if (!vdso_getcpu)
+ printf("Warning: failed to find getcpu in vDSO\n");
+
+ vgetcpu = (getcpu_t) vsyscall_getcpu();
+
+ vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+ if (!vdso_clock_gettime)
+ printf("Warning: failed to find clock_gettime in vDSO\n");
+
+ vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
+ if (!vdso_gettimeofday)
+ printf("Warning: failed to find gettimeofday in vDSO\n");
+
+}
+
+static long sys_getcpu(unsigned * cpu, unsigned * node,
+ void* cache)
+{
+ return syscall(__NR_getcpu, cpu, node, cache);
+}
+
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+ return syscall(__NR_clock_gettime, id, ts);
+}
+
+static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ return syscall(__NR_gettimeofday, tv, tz);
+}
+
+static void test_getcpu(void)
+{
+ printf("[RUN]\tTesting getcpu...\n");
+
+ for (int cpu = 0; ; cpu++) {
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ return;
+
+ unsigned cpu_sys, cpu_vdso, cpu_vsys,
+ node_sys, node_vdso, node_vsys;
+ long ret_sys, ret_vdso = 1, ret_vsys = 1;
+ unsigned node;
+
+ ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+ if (vdso_getcpu)
+ ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+ if (vgetcpu)
+ ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+ if (!ret_sys)
+ node = node_sys;
+ else if (!ret_vdso)
+ node = node_vdso;
+ else if (!ret_vsys)
+ node = node_vsys;
+
+ bool ok = true;
+ if (!ret_sys && (cpu_sys != cpu || node_sys != node))
+ ok = false;
+ if (!ret_vdso && (cpu_vdso != cpu || node_vdso != node))
+ ok = false;
+ if (!ret_vsys && (cpu_vsys != cpu || node_vsys != node))
+ ok = false;
+
+ printf("[%s]\tCPU %u:", ok ? "OK" : "FAIL", cpu);
+ if (!ret_sys)
+ printf(" syscall: cpu %u, node %u", cpu_sys, node_sys);
+ if (!ret_vdso)
+ printf(" vdso: cpu %u, node %u", cpu_vdso, node_vdso);
+ if (!ret_vsys)
+ printf(" vsyscall: cpu %u, node %u", cpu_vsys,
+ node_vsys);
+ printf("\n");
+
+ if (!ok)
+ nerrs++;
+ }
+}
+
+static bool ts_leq(const struct timespec *a, const struct timespec *b)
+{
+ if (a->tv_sec != b->tv_sec)
+ return a->tv_sec < b->tv_sec;
+ else
+ return a->tv_nsec <= b->tv_nsec;
+}
+
+static bool tv_leq(const struct timeval *a, const struct timeval *b)
+{
+ if (a->tv_sec != b->tv_sec)
+ return a->tv_sec < b->tv_sec;
+ else
+ return a->tv_usec <= b->tv_usec;
+}
+
+static char const * const clocknames[] = {
+ [0] = "CLOCK_REALTIME",
+ [1] = "CLOCK_MONOTONIC",
+ [2] = "CLOCK_PROCESS_CPUTIME_ID",
+ [3] = "CLOCK_THREAD_CPUTIME_ID",
+ [4] = "CLOCK_MONOTONIC_RAW",
+ [5] = "CLOCK_REALTIME_COARSE",
+ [6] = "CLOCK_MONOTONIC_COARSE",
+ [7] = "CLOCK_BOOTTIME",
+ [8] = "CLOCK_REALTIME_ALARM",
+ [9] = "CLOCK_BOOTTIME_ALARM",
+ [10] = "CLOCK_SGI_CYCLE",
+ [11] = "CLOCK_TAI",
+};
+
+static void test_one_clock_gettime(int clock, const char *name)
+{
+ struct timespec start, vdso, end;
+ int vdso_ret, end_ret;
+
+ printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock);
+
+ if (sys_clock_gettime(clock, &start) < 0) {
+ if (errno == EINVAL) {
+ vdso_ret = vdso_clock_gettime(clock, &vdso);
+ if (vdso_ret == -EINVAL) {
+ printf("[OK]\tNo such clock.\n");
+ } else {
+ printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret);
+ nerrs++;
+ }
+ } else {
+ printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno);
+ }
+ return;
+ }
+
+ vdso_ret = vdso_clock_gettime(clock, &vdso);
+ end_ret = sys_clock_gettime(clock, &end);
+
+ if (vdso_ret != 0 || end_ret != 0) {
+ printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+ vdso_ret, errno);
+ nerrs++;
+ return;
+ }
+
+ printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
+ (unsigned long long)start.tv_sec, start.tv_nsec,
+ (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
+ (unsigned long long)end.tv_sec, end.tv_nsec);
+
+ if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
+ printf("[FAIL]\tTimes are out of sequence\n");
+ nerrs++;
+ }
+}
+
+static void test_clock_gettime(void)
+{
+ for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
+ clock++) {
+ test_one_clock_gettime(clock, clocknames[clock]);
+ }
+
+ /* Also test some invalid clock ids */
+ test_one_clock_gettime(-1, "invalid");
+ test_one_clock_gettime(INT_MIN, "invalid");
+ test_one_clock_gettime(INT_MAX, "invalid");
+}
+
+static void test_gettimeofday(void)
+{
+ struct timeval start, vdso, end;
+ struct timezone sys_tz, vdso_tz;
+ int vdso_ret, end_ret;
+
+ if (!vdso_gettimeofday)
+ return;
+
+ printf("[RUN]\tTesting gettimeofday...\n");
+
+ if (sys_gettimeofday(&start, &sys_tz) < 0) {
+ printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno);
+ nerrs++;
+ return;
+ }
+
+ vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz);
+ end_ret = sys_gettimeofday(&end, NULL);
+
+ if (vdso_ret != 0 || end_ret != 0) {
+ printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+ vdso_ret, errno);
+ nerrs++;
+ return;
+ }
+
+ printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n",
+ (unsigned long long)start.tv_sec, start.tv_usec,
+ (unsigned long long)vdso.tv_sec, vdso.tv_usec,
+ (unsigned long long)end.tv_sec, end.tv_usec);
+
+ if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) {
+ printf("[FAIL]\tTimes are out of sequence\n");
+ nerrs++;
+ }
+
+ if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest &&
+ sys_tz.tz_dsttime == vdso_tz.tz_dsttime) {
+ printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n",
+ sys_tz.tz_minuteswest, sys_tz.tz_dsttime);
+ } else {
+ printf("[FAIL]\ttimezones do not match\n");
+ nerrs++;
+ }
+
+ /* And make sure that passing NULL for tz doesn't crash. */
+ vdso_gettimeofday(&vdso, NULL);
+}
+
+int main(int argc, char **argv)
+{
+ fill_function_pointers();
+
+ test_clock_gettime();
+ test_gettimeofday();
+
+ /*
+ * Test getcpu() last so that, if something goes wrong setting affinity,
+ * we still run the other tests.
+ */
+ test_getcpu();
+
+ return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
new file mode 100644
index 000000000..0b4f1cc22
--- /dev/null
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -0,0 +1,506 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <errno.h>
+#include <err.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <setjmp.h>
+
+#ifdef __x86_64__
+# define VSYS(x) (x)
+#else
+# define VSYS(x) 0
+#endif
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+# define SYS_getcpu 309
+# else
+# define SYS_getcpu 318
+# endif
+#endif
+
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+/* vsyscalls and vDSO */
+bool should_read_vsyscall = false;
+
+typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
+gtod_t vdso_gtod;
+
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+vgettime_t vdso_gettime;
+
+typedef long (*time_func_t)(time_t *t);
+time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
+time_func_t vdso_time;
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
+getcpu_t vdso_getcpu;
+
+static void init_vdso(void)
+{
+ void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso)
+ vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso) {
+ printf("[WARN]\tfailed to find vDSO\n");
+ return;
+ }
+
+ vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
+ if (!vdso_gtod)
+ printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
+
+ vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+ if (!vdso_gettime)
+ printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
+
+ vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
+ if (!vdso_time)
+ printf("[WARN]\tfailed to find time in vDSO\n");
+
+ vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+ if (!vdso_getcpu) {
+ /* getcpu() was never wired up in the 32-bit vDSO. */
+ printf("[%s]\tfailed to find getcpu in vDSO\n",
+ sizeof(long) == 8 ? "WARN" : "NOTE");
+ }
+}
+
+static int init_vsys(void)
+{
+#ifdef __x86_64__
+ int nerrs = 0;
+ FILE *maps;
+ char line[MAPS_LINE_LEN];
+ bool found = false;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps) {
+ printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
+ should_read_vsyscall = true;
+ return 0;
+ }
+
+ while (fgets(line, MAPS_LINE_LEN, maps)) {
+ char r, x;
+ void *start, *end;
+ char name[MAPS_LINE_LEN];
+
+ /* sscanf() is safe here as strlen(name) >= strlen(line) */
+ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+ &start, &end, &r, &x, name) != 5)
+ continue;
+
+ if (strcmp(name, "[vsyscall]"))
+ continue;
+
+ printf("\tvsyscall map: %s", line);
+
+ if (start != (void *)0xffffffffff600000 ||
+ end != (void *)0xffffffffff601000) {
+ printf("[FAIL]\taddress range is nonsense\n");
+ nerrs++;
+ }
+
+ printf("\tvsyscall permissions are %c-%c\n", r, x);
+ should_read_vsyscall = (r == 'r');
+ if (x != 'x') {
+ vgtod = NULL;
+ vtime = NULL;
+ vgetcpu = NULL;
+ }
+
+ found = true;
+ break;
+ }
+
+ fclose(maps);
+
+ if (!found) {
+ printf("\tno vsyscall map in /proc/self/maps\n");
+ should_read_vsyscall = false;
+ vgtod = NULL;
+ vtime = NULL;
+ vgetcpu = NULL;
+ }
+
+ return nerrs;
+#else
+ return 0;
+#endif
+}
+
+/* syscalls */
+static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
+{
+ return syscall(SYS_gettimeofday, tv, tz);
+}
+
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+ return syscall(SYS_clock_gettime, id, ts);
+}
+
+static inline long sys_time(time_t *t)
+{
+ return syscall(SYS_time, t);
+}
+
+static inline long sys_getcpu(unsigned * cpu, unsigned * node,
+ void* cache)
+{
+ return syscall(SYS_getcpu, cpu, node, cache);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ siglongjmp(jmpbuf, 1);
+}
+
+static double tv_diff(const struct timeval *a, const struct timeval *b)
+{
+ return (double)(a->tv_sec - b->tv_sec) +
+ (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
+}
+
+static int check_gtod(const struct timeval *tv_sys1,
+ const struct timeval *tv_sys2,
+ const struct timezone *tz_sys,
+ const char *which,
+ const struct timeval *tv_other,
+ const struct timezone *tz_other)
+{
+ int nerrs = 0;
+ double d1, d2;
+
+ if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
+ printf("[FAIL] %s tz mismatch\n", which);
+ nerrs++;
+ }
+
+ d1 = tv_diff(tv_other, tv_sys1);
+ d2 = tv_diff(tv_sys2, tv_other);
+ printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
+
+ if (d1 < 0 || d2 < 0) {
+ printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
+ nerrs++;
+ } else {
+ printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
+ }
+
+ return nerrs;
+}
+
+static int test_gtod(void)
+{
+ struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
+ struct timezone tz_sys, tz_vdso, tz_vsys;
+ long ret_vdso = -1;
+ long ret_vsys = -1;
+ int nerrs = 0;
+
+ printf("[RUN]\ttest gettimeofday()\n");
+
+ if (sys_gtod(&tv_sys1, &tz_sys) != 0)
+ err(1, "syscall gettimeofday");
+ if (vdso_gtod)
+ ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
+ if (vgtod)
+ ret_vsys = vgtod(&tv_vsys, &tz_vsys);
+ if (sys_gtod(&tv_sys2, &tz_sys) != 0)
+ err(1, "syscall gettimeofday");
+
+ if (vdso_gtod) {
+ if (ret_vdso == 0) {
+ nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
+ } else {
+ printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
+ nerrs++;
+ }
+ }
+
+ if (vgtod) {
+ if (ret_vsys == 0) {
+ nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
+ } else {
+ printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
+ nerrs++;
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_time(void) {
+ int nerrs = 0;
+
+ printf("[RUN]\ttest time()\n");
+ long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
+ long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
+ t_sys1 = sys_time(&t2_sys1);
+ if (vdso_time)
+ t_vdso = vdso_time(&t2_vdso);
+ if (vtime)
+ t_vsys = vtime(&t2_vsys);
+ t_sys2 = sys_time(&t2_sys2);
+ if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
+ printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
+ nerrs++;
+ return nerrs;
+ }
+
+ if (vdso_time) {
+ if (t_vdso < 0 || t_vdso != t2_vdso) {
+ printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
+ nerrs++;
+ } else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
+ printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO time() is okay\n");
+ }
+ }
+
+ if (vtime) {
+ if (t_vsys < 0 || t_vsys != t2_vsys) {
+ printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
+ nerrs++;
+ } else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
+ printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall time() is okay\n");
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_getcpu(int cpu)
+{
+ int nerrs = 0;
+ long ret_sys, ret_vdso = -1, ret_vsys = -1;
+
+ printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
+
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+ printf("[SKIP]\tfailed to force CPU %d\n", cpu);
+ return nerrs;
+ }
+
+ unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
+ unsigned node = 0;
+ bool have_node = false;
+ ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+ if (vdso_getcpu)
+ ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+ if (vgetcpu)
+ ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+ if (ret_sys == 0) {
+ if (cpu_sys != cpu) {
+ printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
+ nerrs++;
+ }
+
+ have_node = true;
+ node = node_sys;
+ }
+
+ if (vdso_getcpu) {
+ if (ret_vdso) {
+ printf("[FAIL]\tvDSO getcpu() failed\n");
+ nerrs++;
+ } else {
+ if (!have_node) {
+ have_node = true;
+ node = node_vdso;
+ }
+
+ if (cpu_vdso != cpu) {
+ printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO reported correct CPU\n");
+ }
+
+ if (node_vdso != node) {
+ printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO reported correct node\n");
+ }
+ }
+ }
+
+ if (vgetcpu) {
+ if (ret_vsys) {
+ printf("[FAIL]\tvsyscall getcpu() failed\n");
+ nerrs++;
+ } else {
+ if (!have_node) {
+ have_node = true;
+ node = node_vsys;
+ }
+
+ if (cpu_vsys != cpu) {
+ printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall reported correct CPU\n");
+ }
+
+ if (node_vsys != node) {
+ printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall reported correct node\n");
+ }
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_vsys_r(void)
+{
+#ifdef __x86_64__
+ printf("[RUN]\tChecking read access to the vsyscall page\n");
+ bool can_read;
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ *(volatile int *)0xffffffffff600000;
+ can_read = true;
+ } else {
+ can_read = false;
+ }
+
+ if (can_read && !should_read_vsyscall) {
+ printf("[FAIL]\tWe have read access, but we shouldn't\n");
+ return 1;
+ } else if (!can_read && should_read_vsyscall) {
+ printf("[FAIL]\tWe don't have read access, but we should\n");
+ return 1;
+ } else {
+ printf("[OK]\tgot expected result\n");
+ }
+#endif
+
+ return 0;
+}
+
+
+#ifdef __x86_64__
+#define X86_EFLAGS_TF (1UL << 8)
+static volatile sig_atomic_t num_vsyscall_traps;
+
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
+}
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
+ unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP];
+
+ if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0)
+ num_vsyscall_traps++;
+}
+
+static int test_emulation(void)
+{
+ time_t tmp;
+ bool is_native;
+
+ if (!vtime)
+ return 0;
+
+ printf("[RUN]\tchecking that vsyscalls are emulated\n");
+ sethandler(SIGTRAP, sigtrap, 0);
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ vtime(&tmp);
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+ /*
+ * If vsyscalls are emulated, we expect a single trap in the
+ * vsyscall page -- the call instruction will trap with RIP
+ * pointing to the entry point before emulation takes over.
+ * In native mode, we expect two traps, since whatever code
+ * the vsyscall page contains will be more than just a ret
+ * instruction.
+ */
+ is_native = (num_vsyscall_traps > 1);
+
+ printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+ (is_native ? "FAIL" : "OK"),
+ (is_native ? "native" : "emulated"),
+ (int)num_vsyscall_traps);
+
+ return is_native;
+}
+#endif
+
+int main(int argc, char **argv)
+{
+ int nerrs = 0;
+
+ init_vdso();
+ nerrs += init_vsys();
+
+ nerrs += test_gtod();
+ nerrs += test_time();
+ nerrs += test_getcpu(0);
+ nerrs += test_getcpu(1);
+
+ sethandler(SIGSEGV, sigsegv, 0);
+ nerrs += test_vsys_r();
+
+#ifdef __x86_64__
+ nerrs += test_emulation();
+#endif
+
+ return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/thunks.S b/tools/testing/selftests/x86/thunks.S
new file mode 100644
index 000000000..ce8a995bb
--- /dev/null
+++ b/tools/testing/selftests/x86/thunks.S
@@ -0,0 +1,67 @@
+/*
+ * thunks.S - assembly helpers for mixed-bitness code
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * These are little helpers that make it easier to switch bitness on
+ * the fly.
+ */
+
+ .text
+
+ .global call32_from_64
+ .type call32_from_64, @function
+call32_from_64:
+ // rdi: stack to use
+ // esi: function to call
+
+ // Save registers
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+
+ // Switch stacks
+ mov %rsp,(%rdi)
+ mov %rdi,%rsp
+
+ // Switch to compatibility mode
+ pushq $0x23 /* USER32_CS */
+ pushq $1f
+ lretq
+
+1:
+ .code32
+ // Call the function
+ call *%esi
+ // Switch back to long mode
+ jmp $0x33,$1f
+ .code64
+
+1:
+ // Restore the stack
+ mov (%rsp),%rsp
+
+ // Restore registers
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+
+ ret
+
+.size call32_from_64, .-call32_from_64
diff --git a/tools/testing/selftests/x86/thunks_32.S b/tools/testing/selftests/x86/thunks_32.S
new file mode 100644
index 000000000..29b644bb9
--- /dev/null
+++ b/tools/testing/selftests/x86/thunks_32.S
@@ -0,0 +1,55 @@
+/*
+ * thunks_32.S - assembly helpers for mixed-bitness code
+ * Copyright (c) 2015 Denys Vlasenko
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * These are little helpers that make it easier to switch bitness on
+ * the fly.
+ */
+
+ .text
+ .code32
+
+ .global call64_from_32
+ .type call32_from_64, @function
+
+ // 4(%esp): function to call
+call64_from_32:
+ // Fetch function address
+ mov 4(%esp), %eax
+
+ // Save registers which are callee-clobbered by 64-bit ABI
+ push %ecx
+ push %edx
+ push %esi
+ push %edi
+
+ // Switch to long mode
+ jmp $0x33,$1f
+1: .code64
+
+ // Call the function
+ call *%rax
+
+ // Switch to compatibility mode
+ push $0x23 /* USER32_CS */
+ .code32; push $1f; .code64 /* hack: can't have X86_64_32S relocation in 32-bit ELF */
+ lretq
+1: .code32
+
+ pop %edi
+ pop %esi
+ pop %edx
+ pop %ecx
+
+ ret
+
+.size call64_from_32, .-call64_from_32
diff --git a/tools/testing/selftests/x86/trivial_32bit_program.c b/tools/testing/selftests/x86/trivial_32bit_program.c
new file mode 100644
index 000000000..fabdf0f51
--- /dev/null
+++ b/tools/testing/selftests/x86/trivial_32bit_program.c
@@ -0,0 +1,18 @@
+/*
+ * Trivial program to check that we have a valid 32-bit build environment.
+ * Copyright (c) 2015 Andy Lutomirski
+ * GPL v2
+ */
+
+#ifndef __i386__
+# error wrong architecture
+#endif
+
+#include <stdio.h>
+
+int main()
+{
+ printf("\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/trivial_64bit_program.c b/tools/testing/selftests/x86/trivial_64bit_program.c
new file mode 100644
index 000000000..05c6a41b3
--- /dev/null
+++ b/tools/testing/selftests/x86/trivial_64bit_program.c
@@ -0,0 +1,18 @@
+/*
+ * Trivial program to check that we have a valid 64-bit build environment.
+ * Copyright (c) 2015 Andy Lutomirski
+ * GPL v2
+ */
+
+#ifndef __x86_64__
+# error wrong architecture
+#endif
+
+#include <stdio.h>
+
+int main()
+{
+ printf("\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/trivial_program.c b/tools/testing/selftests/x86/trivial_program.c
new file mode 100644
index 000000000..46a447163
--- /dev/null
+++ b/tools/testing/selftests/x86/trivial_program.c
@@ -0,0 +1,10 @@
+/* Trivial program to check that compilation with certain flags is working. */
+
+#include <stdio.h>
+
+int
+main(void)
+{
+ puts("");
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/unwind_vdso.c b/tools/testing/selftests/x86/unwind_vdso.c
new file mode 100644
index 000000000..00a26a82f
--- /dev/null
+++ b/tools/testing/selftests/x86/unwind_vdso.c
@@ -0,0 +1,211 @@
+/*
+ * unwind_vdso.c - tests unwind info for AT_SYSINFO in the vDSO
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * This tests __kernel_vsyscall's unwind info.
+ */
+
+#define _GNU_SOURCE
+
+#include <features.h>
+#include <stdio.h>
+
+#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16
+
+int main()
+{
+ /* We need getauxval(). */
+ printf("[SKIP]\tGLIBC before 2.16 cannot compile this test\n");
+ return 0;
+}
+
+#else
+
+#include <sys/time.h>
+#include <stdlib.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+#include <sys/ucontext.h>
+#include <link.h>
+#include <sys/auxv.h>
+#include <dlfcn.h>
+#include <unwind.h>
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+#ifdef __x86_64__
+# define WIDTH "q"
+#else
+# define WIDTH "l"
+#endif
+
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+ : : "rm" (eflags) : "flags");
+}
+
+#define X86_EFLAGS_TF (1UL << 8)
+
+static volatile sig_atomic_t nerrs;
+static unsigned long sysinfo;
+static bool got_sysinfo = false;
+static unsigned long return_address;
+
+struct unwind_state {
+ unsigned long ip; /* trap source */
+ int depth; /* -1 until we hit the trap source */
+};
+
+_Unwind_Reason_Code trace_fn(struct _Unwind_Context * ctx, void *opaque)
+{
+ struct unwind_state *state = opaque;
+ unsigned long ip = _Unwind_GetIP(ctx);
+
+ if (state->depth == -1) {
+ if (ip == state->ip)
+ state->depth = 0;
+ else
+ return _URC_NO_REASON; /* Not there yet */
+ }
+ printf("\t 0x%lx\n", ip);
+
+ if (ip == return_address) {
+ /* Here we are. */
+ unsigned long eax = _Unwind_GetGR(ctx, 0);
+ unsigned long ecx = _Unwind_GetGR(ctx, 1);
+ unsigned long edx = _Unwind_GetGR(ctx, 2);
+ unsigned long ebx = _Unwind_GetGR(ctx, 3);
+ unsigned long ebp = _Unwind_GetGR(ctx, 5);
+ unsigned long esi = _Unwind_GetGR(ctx, 6);
+ unsigned long edi = _Unwind_GetGR(ctx, 7);
+ bool ok = (eax == SYS_getpid || eax == getpid()) &&
+ ebx == 1 && ecx == 2 && edx == 3 &&
+ esi == 4 && edi == 5 && ebp == 6;
+
+ if (!ok)
+ nerrs++;
+ printf("[%s]\t NR = %ld, args = %ld, %ld, %ld, %ld, %ld, %ld\n",
+ (ok ? "OK" : "FAIL"),
+ eax, ebx, ecx, edx, esi, edi, ebp);
+
+ return _URC_NORMAL_STOP;
+ } else {
+ state->depth++;
+ return _URC_NO_REASON;
+ }
+}
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
+ struct unwind_state state;
+ unsigned long ip = ctx->uc_mcontext.gregs[REG_EIP];
+
+ if (!got_sysinfo && ip == sysinfo) {
+ got_sysinfo = true;
+
+ /* Find the return address. */
+ return_address = *(unsigned long *)(unsigned long)ctx->uc_mcontext.gregs[REG_ESP];
+
+ printf("\tIn vsyscall at 0x%lx, returning to 0x%lx\n",
+ ip, return_address);
+ }
+
+ if (!got_sysinfo)
+ return; /* Not there yet */
+
+ if (ip == return_address) {
+ ctx->uc_mcontext.gregs[REG_EFL] &= ~X86_EFLAGS_TF;
+ printf("\tVsyscall is done\n");
+ return;
+ }
+
+ printf("\tSIGTRAP at 0x%lx\n", ip);
+
+ state.ip = ip;
+ state.depth = -1;
+ _Unwind_Backtrace(trace_fn, &state);
+}
+
+int main()
+{
+ sysinfo = getauxval(AT_SYSINFO);
+ printf("\tAT_SYSINFO is 0x%lx\n", sysinfo);
+
+ Dl_info info;
+ if (!dladdr((void *)sysinfo, &info)) {
+ printf("[WARN]\tdladdr failed on AT_SYSINFO\n");
+ } else {
+ printf("[OK]\tAT_SYSINFO maps to %s, loaded at 0x%p\n",
+ info.dli_fname, info.dli_fbase);
+ }
+
+ sethandler(SIGTRAP, sigtrap, 0);
+
+ syscall(SYS_getpid); /* Force symbol binding without TF set. */
+ printf("[RUN]\tSet TF and check a fast syscall\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ syscall(SYS_getpid, 1, 2, 3, 4, 5, 6);
+ if (!got_sysinfo) {
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+ /*
+ * The most likely cause of this is that you're on Debian or
+ * a Debian-based distro, you're missing libc6-i686, and you're
+ * affected by libc/19006 (https://sourceware.org/PR19006).
+ */
+ printf("[WARN]\tsyscall(2) didn't enter AT_SYSINFO\n");
+ }
+
+ if (get_eflags() & X86_EFLAGS_TF) {
+ printf("[FAIL]\tTF is still set\n");
+ nerrs++;
+ }
+
+ if (nerrs) {
+ printf("[FAIL]\tThere were errors\n");
+ return 1;
+ } else {
+ printf("[OK]\tAll is well\n");
+ return 0;
+ }
+}
+
+#endif /* New enough libc */
diff --git a/tools/testing/selftests/x86/vdso_restorer.c b/tools/testing/selftests/x86/vdso_restorer.c
new file mode 100644
index 000000000..cb038424a
--- /dev/null
+++ b/tools/testing/selftests/x86/vdso_restorer.c
@@ -0,0 +1,88 @@
+/*
+ * vdso_restorer.c - tests vDSO-based signal restore
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * This makes sure that sa_restorer == NULL keeps working on 32-bit
+ * configurations. Modern glibc doesn't use it under any circumstances,
+ * so it's easy to overlook breakage.
+ *
+ * 64-bit userspace has never supported sa_restorer == NULL, so this is
+ * 32-bit only.
+ */
+
+#define _GNU_SOURCE
+
+#include <err.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <sys/syscall.h>
+
+/* Open-code this -- the headers are too messy to easily use them. */
+struct real_sigaction {
+ void *handler;
+ unsigned long flags;
+ void *restorer;
+ unsigned int mask[2];
+};
+
+static volatile sig_atomic_t handler_called;
+
+static void handler_with_siginfo(int sig, siginfo_t *info, void *ctx_void)
+{
+ handler_called = 1;
+}
+
+static void handler_without_siginfo(int sig)
+{
+ handler_called = 1;
+}
+
+int main()
+{
+ int nerrs = 0;
+ struct real_sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.handler = handler_with_siginfo;
+ sa.flags = SA_SIGINFO;
+ sa.restorer = NULL; /* request kernel-provided restorer */
+
+ if (syscall(SYS_rt_sigaction, SIGUSR1, &sa, NULL, 8) != 0)
+ err(1, "raw rt_sigaction syscall");
+
+ raise(SIGUSR1);
+
+ if (handler_called) {
+ printf("[OK]\tSA_SIGINFO handler returned successfully\n");
+ } else {
+ printf("[FAIL]\tSA_SIGINFO handler was not called\n");
+ nerrs++;
+ }
+
+ sa.flags = 0;
+ sa.handler = handler_without_siginfo;
+ if (syscall(SYS_sigaction, SIGUSR1, &sa, 0) != 0)
+ err(1, "raw sigaction syscall");
+ handler_called = 0;
+
+ raise(SIGUSR1);
+
+ if (handler_called) {
+ printf("[OK]\t!SA_SIGINFO handler returned successfully\n");
+ } else {
+ printf("[FAIL]\t!SA_SIGINFO handler was not called\n");
+ nerrs++;
+ }
+}
diff --git a/tools/testing/selftests/zram/Makefile b/tools/testing/selftests/zram/Makefile
new file mode 100644
index 000000000..7f78eb1b5
--- /dev/null
+++ b/tools/testing/selftests/zram/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+TEST_PROGS := zram.sh
+TEST_FILES := zram01.sh zram02.sh zram_lib.sh
+EXTRA_CLEAN := err.log
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/zram/README b/tools/testing/selftests/zram/README
new file mode 100644
index 000000000..7972cc512
--- /dev/null
+++ b/tools/testing/selftests/zram/README
@@ -0,0 +1,40 @@
+zram: Compressed RAM based block devices
+----------------------------------------
+* Introduction
+
+The zram module creates RAM based block devices named /dev/zram<id>
+(<id> = 0, 1, ...). Pages written to these disks are compressed and stored
+in memory itself. These disks allow very fast I/O and compression provides
+good amounts of memory savings. Some of the usecases include /tmp storage,
+use as swap disks, various caches under /var and maybe many more :)
+
+Statistics for individual zram devices are exported through sysfs nodes at
+/sys/block/zram<id>/
+
+Kconfig required:
+CONFIG_ZRAM=y
+CONFIG_CRYPTO_LZ4=y
+CONFIG_ZPOOL=y
+CONFIG_ZSMALLOC=y
+
+ZRAM Testcases
+--------------
+zram_lib.sh: create library with initialization/cleanup functions
+zram.sh: For sanity check of CONFIG_ZRAM and to run zram01 and zram02
+
+Two functional tests: zram01 and zram02:
+zram01.sh: creates general purpose ram disks with ext4 filesystems
+zram02.sh: creates block device for swap
+
+Commands required for testing:
+ - bc
+ - dd
+ - free
+ - awk
+ - mkswap
+ - swapon
+ - swapoff
+ - mkfs/ mkfs.ext4
+
+For more information please refer:
+kernel-source-tree/Documentation/blockdev/zram.txt
diff --git a/tools/testing/selftests/zram/config b/tools/testing/selftests/zram/config
new file mode 100644
index 000000000..e0cc47e2c
--- /dev/null
+++ b/tools/testing/selftests/zram/config
@@ -0,0 +1,2 @@
+CONFIG_ZSMALLOC=y
+CONFIG_ZRAM=m
diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh
new file mode 100755
index 000000000..b0b91d9b0
--- /dev/null
+++ b/tools/testing/selftests/zram/zram.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+TCID="zram.sh"
+
+. ./zram_lib.sh
+
+run_zram () {
+echo "--------------------"
+echo "running zram tests"
+echo "--------------------"
+./zram01.sh
+echo ""
+./zram02.sh
+}
+
+check_prereqs
+
+run_zram
diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh
new file mode 100755
index 000000000..8abc99650
--- /dev/null
+++ b/tools/testing/selftests/zram/zram01.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Test creates several zram devices with different filesystems on them.
+# It fills each device with zeros and checks that compression works.
+#
+# Author: Alexey Kodanev <alexey.kodanev@oracle.com>
+# Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
+
+TCID="zram01"
+ERR_CODE=0
+
+. ./zram_lib.sh
+
+# Test will create the following number of zram devices:
+dev_num=1
+# This is a list of parameters for zram devices.
+# Number of items must be equal to 'dev_num' parameter.
+zram_max_streams="2"
+
+# The zram sysfs node 'disksize' value can be either in bytes,
+# or you can use mem suffixes. But in some old kernels, mem
+# suffixes are not supported, for example, in RHEL6.6GA's kernel
+# layer, it uses strict_strtoull() to parse disksize which does
+# not support mem suffixes, in some newer kernels, they use
+# memparse() which supports mem suffixes. So here we just use
+# bytes to make sure everything works correctly.
+zram_sizes="2097152" # 2MB
+zram_mem_limits="2M"
+zram_filesystems="ext4"
+zram_algs="lzo"
+
+zram_fill_fs()
+{
+ for i in $(seq $dev_start $dev_end); do
+ echo "fill zram$i..."
+ local b=0
+ while [ true ]; do
+ dd conv=notrunc if=/dev/zero of=zram${i}/file \
+ oflag=append count=1 bs=1024 status=none \
+ > /dev/null 2>&1 || break
+ b=$(($b + 1))
+ done
+ echo "zram$i can be filled with '$b' KB"
+
+ local mem_used_total=`awk '{print $3}' "/sys/block/zram$i/mm_stat"`
+ local v=$((100 * 1024 * $b / $mem_used_total))
+ if [ "$v" -lt 100 ]; then
+ echo "FAIL compression ratio: 0.$v:1"
+ ERR_CODE=-1
+ return
+ fi
+
+ echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK"
+ done
+}
+
+check_prereqs
+zram_load
+zram_max_streams
+zram_compress_alg
+zram_set_disksizes
+zram_set_memlimit
+zram_makefs
+zram_mount
+
+zram_fill_fs
+zram_cleanup
+
+if [ $ERR_CODE -ne 0 ]; then
+ echo "$TCID : [FAIL]"
+else
+ echo "$TCID : [PASS]"
+fi
diff --git a/tools/testing/selftests/zram/zram02.sh b/tools/testing/selftests/zram/zram02.sh
new file mode 100755
index 000000000..3768cfd2e
--- /dev/null
+++ b/tools/testing/selftests/zram/zram02.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Test checks that we can create swap zram device.
+#
+# Author: Alexey Kodanev <alexey.kodanev@oracle.com>
+# Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
+
+TCID="zram02"
+ERR_CODE=0
+
+. ./zram_lib.sh
+
+# Test will create the following number of zram devices:
+dev_num=1
+# This is a list of parameters for zram devices.
+# Number of items must be equal to 'dev_num' parameter.
+zram_max_streams="2"
+
+# The zram sysfs node 'disksize' value can be either in bytes,
+# or you can use mem suffixes. But in some old kernels, mem
+# suffixes are not supported, for example, in RHEL6.6GA's kernel
+# layer, it uses strict_strtoull() to parse disksize which does
+# not support mem suffixes, in some newer kernels, they use
+# memparse() which supports mem suffixes. So here we just use
+# bytes to make sure everything works correctly.
+zram_sizes="1048576" # 1M
+zram_mem_limits="1M"
+
+check_prereqs
+zram_load
+zram_max_streams
+zram_set_disksizes
+zram_set_memlimit
+zram_makeswap
+zram_swapoff
+zram_cleanup
+
+if [ $ERR_CODE -ne 0 ]; then
+ echo "$TCID : [FAIL]"
+else
+ echo "$TCID : [PASS]"
+fi
diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh
new file mode 100755
index 000000000..130d193cb
--- /dev/null
+++ b/tools/testing/selftests/zram/zram_lib.sh
@@ -0,0 +1,278 @@
+#!/bin/sh
+# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Author: Alexey Kodanev <alexey.kodanev@oracle.com>
+# Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
+
+dev_makeswap=-1
+dev_mounted=-1
+dev_start=0
+dev_end=-1
+module_load=-1
+sys_control=-1
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+kernel_version=`uname -r | cut -d'.' -f1,2`
+kernel_major=${kernel_version%.*}
+kernel_minor=${kernel_version#*.}
+
+trap INT
+
+check_prereqs()
+{
+ local msg="skip all tests:"
+ local uid=$(id -u)
+
+ if [ $uid -ne 0 ]; then
+ echo $msg must be run as root >&2
+ exit $ksft_skip
+ fi
+}
+
+kernel_gte()
+{
+ major=${1%.*}
+ minor=${1#*.}
+
+ if [ $kernel_major -gt $major ]; then
+ return 0
+ elif [[ $kernel_major -eq $major && $kernel_minor -ge $minor ]]; then
+ return 0
+ fi
+
+ return 1
+}
+
+zram_cleanup()
+{
+ echo "zram cleanup"
+ local i=
+ for i in $(seq $dev_start $dev_makeswap); do
+ swapoff /dev/zram$i
+ done
+
+ for i in $(seq $dev_start $dev_mounted); do
+ umount /dev/zram$i
+ done
+
+ for i in $(seq $dev_start $dev_end); do
+ echo 1 > /sys/block/zram${i}/reset
+ rm -rf zram$i
+ done
+
+ if [ $sys_control -eq 1 ]; then
+ for i in $(seq $dev_start $dev_end); do
+ echo $i > /sys/class/zram-control/hot_remove
+ done
+ fi
+
+ if [ $module_load -eq 1 ]; then
+ rmmod zram > /dev/null 2>&1
+ fi
+}
+
+zram_load()
+{
+ echo "create '$dev_num' zram device(s)"
+
+ # zram module loaded, new kernel
+ if [ -d "/sys/class/zram-control" ]; then
+ echo "zram modules already loaded, kernel supports" \
+ "zram-control interface"
+ dev_start=$(ls /dev/zram* | wc -w)
+ dev_end=$(($dev_start + $dev_num - 1))
+ sys_control=1
+
+ for i in $(seq $dev_start $dev_end); do
+ cat /sys/class/zram-control/hot_add > /dev/null
+ done
+
+ echo "all zram devices (/dev/zram$dev_start~$dev_end" \
+ "successfully created"
+ return 0
+ fi
+
+ # detect old kernel or built-in
+ modprobe zram num_devices=$dev_num
+ if [ ! -d "/sys/class/zram-control" ]; then
+ if grep -q '^zram' /proc/modules; then
+ rmmod zram > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ echo "zram module is being used on old kernel" \
+ "without zram-control interface"
+ exit $ksft_skip
+ fi
+ else
+ echo "test needs CONFIG_ZRAM=m on old kernel without" \
+ "zram-control interface"
+ exit $ksft_skip
+ fi
+ modprobe zram num_devices=$dev_num
+ fi
+
+ module_load=1
+ dev_end=$(($dev_num - 1))
+ echo "all zram devices (/dev/zram0~$dev_end) successfully created"
+}
+
+zram_max_streams()
+{
+ echo "set max_comp_streams to zram device(s)"
+
+ kernel_gte 4.7
+ if [ $? -eq 0 ]; then
+ echo "The device attribute max_comp_streams was"\
+ "deprecated in 4.7"
+ return 0
+ fi
+
+ local i=$dev_start
+ for max_s in $zram_max_streams; do
+ local sys_path="/sys/block/zram${i}/max_comp_streams"
+ echo $max_s > $sys_path || \
+ echo "FAIL failed to set '$max_s' to $sys_path"
+ sleep 1
+ local max_streams=$(cat $sys_path)
+
+ [ "$max_s" -ne "$max_streams" ] && \
+ echo "FAIL can't set max_streams '$max_s', get $max_stream"
+
+ i=$(($i + 1))
+ echo "$sys_path = '$max_streams'"
+ done
+
+ echo "zram max streams: OK"
+}
+
+zram_compress_alg()
+{
+ echo "test that we can set compression algorithm"
+
+ local i=$dev_start
+ local algs=$(cat /sys/block/zram${i}/comp_algorithm)
+ echo "supported algs: $algs"
+
+ for alg in $zram_algs; do
+ local sys_path="/sys/block/zram${i}/comp_algorithm"
+ echo "$alg" > $sys_path || \
+ echo "FAIL can't set '$alg' to $sys_path"
+ i=$(($i + 1))
+ echo "$sys_path = '$alg'"
+ done
+
+ echo "zram set compression algorithm: OK"
+}
+
+zram_set_disksizes()
+{
+ echo "set disk size to zram device(s)"
+ local i=$dev_start
+ for ds in $zram_sizes; do
+ local sys_path="/sys/block/zram${i}/disksize"
+ echo "$ds" > $sys_path || \
+ echo "FAIL can't set '$ds' to $sys_path"
+
+ i=$(($i + 1))
+ echo "$sys_path = '$ds'"
+ done
+
+ echo "zram set disksizes: OK"
+}
+
+zram_set_memlimit()
+{
+ echo "set memory limit to zram device(s)"
+
+ local i=$dev_start
+ for ds in $zram_mem_limits; do
+ local sys_path="/sys/block/zram${i}/mem_limit"
+ echo "$ds" > $sys_path || \
+ echo "FAIL can't set '$ds' to $sys_path"
+
+ i=$(($i + 1))
+ echo "$sys_path = '$ds'"
+ done
+
+ echo "zram set memory limit: OK"
+}
+
+zram_makeswap()
+{
+ echo "make swap with zram device(s)"
+ local i=$dev_start
+ for i in $(seq $dev_start $dev_end); do
+ mkswap /dev/zram$i > err.log 2>&1
+ if [ $? -ne 0 ]; then
+ cat err.log
+ echo "FAIL mkswap /dev/zram$1 failed"
+ fi
+
+ swapon /dev/zram$i > err.log 2>&1
+ if [ $? -ne 0 ]; then
+ cat err.log
+ echo "FAIL swapon /dev/zram$1 failed"
+ fi
+
+ echo "done with /dev/zram$i"
+ dev_makeswap=$i
+ done
+
+ echo "zram making zram mkswap and swapon: OK"
+}
+
+zram_swapoff()
+{
+ local i=
+ for i in $(seq $dev_start $dev_end); do
+ swapoff /dev/zram$i > err.log 2>&1
+ if [ $? -ne 0 ]; then
+ cat err.log
+ echo "FAIL swapoff /dev/zram$i failed"
+ fi
+ done
+ dev_makeswap=-1
+
+ echo "zram swapoff: OK"
+}
+
+zram_makefs()
+{
+ local i=$dev_start
+ for fs in $zram_filesystems; do
+ # if requested fs not supported default it to ext2
+ which mkfs.$fs > /dev/null 2>&1 || fs=ext2
+
+ echo "make $fs filesystem on /dev/zram$i"
+ mkfs.$fs /dev/zram$i > err.log 2>&1
+ if [ $? -ne 0 ]; then
+ cat err.log
+ echo "FAIL failed to make $fs on /dev/zram$i"
+ fi
+ i=$(($i + 1))
+ echo "zram mkfs.$fs: OK"
+ done
+}
+
+zram_mount()
+{
+ local i=0
+ for i in $(seq $dev_start $dev_end); do
+ echo "mount /dev/zram$i"
+ mkdir zram$i
+ mount /dev/zram$i zram$i > /dev/null || \
+ echo "FAIL mount /dev/zram$i failed"
+ dev_mounted=$i
+ done
+
+ echo "zram mount of zram device(s): OK"
+}
diff --git a/tools/testing/vsock/.gitignore b/tools/testing/vsock/.gitignore
new file mode 100644
index 000000000..dc5f11faf
--- /dev/null
+++ b/tools/testing/vsock/.gitignore
@@ -0,0 +1,2 @@
+*.d
+vsock_diag_test
diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile
new file mode 100644
index 000000000..66ba09241
--- /dev/null
+++ b/tools/testing/vsock/Makefile
@@ -0,0 +1,9 @@
+all: test
+test: vsock_diag_test
+vsock_diag_test: vsock_diag_test.o timeout.o control.o
+
+CFLAGS += -g -O2 -Werror -Wall -I. -I../../include/uapi -I../../include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE
+.PHONY: all test clean
+clean:
+ ${RM} *.o *.d vsock_diag_test
+-include *.d
diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README
new file mode 100644
index 000000000..2cc6d7302
--- /dev/null
+++ b/tools/testing/vsock/README
@@ -0,0 +1,36 @@
+AF_VSOCK test suite
+-------------------
+These tests exercise net/vmw_vsock/ host<->guest sockets for VMware, KVM, and
+Hyper-V.
+
+The following tests are available:
+
+ * vsock_diag_test - vsock_diag.ko module for listing open sockets
+
+The following prerequisite steps are not automated and must be performed prior
+to running tests:
+
+1. Build the kernel and these tests.
+2. Install the kernel and tests on the host.
+3. Install the kernel and tests inside the guest.
+4. Boot the guest and ensure that the AF_VSOCK transport is enabled.
+
+Invoke test binaries in both directions as follows:
+
+ # host=server, guest=client
+ (host)# $TEST_BINARY --mode=server \
+ --control-port=1234 \
+ --peer-cid=3
+ (guest)# $TEST_BINARY --mode=client \
+ --control-host=$HOST_IP \
+ --control-port=1234 \
+ --peer-cid=2
+
+ # host=client, guest=server
+ (guest)# $TEST_BINARY --mode=server \
+ --control-port=1234 \
+ --peer-cid=2
+ (host)# $TEST_BINARY --mode=client \
+ --control-port=$GUEST_IP \
+ --control-port=1234 \
+ --peer-cid=3
diff --git a/tools/testing/vsock/control.c b/tools/testing/vsock/control.c
new file mode 100644
index 000000000..90fd47f0e
--- /dev/null
+++ b/tools/testing/vsock/control.c
@@ -0,0 +1,219 @@
+/* Control socket for client/server test execution
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+/* The client and server may need to coordinate to avoid race conditions like
+ * the client attempting to connect to a socket that the server is not
+ * listening on yet. The control socket offers a communications channel for
+ * such coordination tasks.
+ *
+ * If the client calls control_expectln("LISTENING"), then it will block until
+ * the server calls control_writeln("LISTENING"). This provides a simple
+ * mechanism for coordinating between the client and the server.
+ */
+
+#include <errno.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "timeout.h"
+#include "control.h"
+
+static int control_fd = -1;
+
+/* Open the control socket, either in server or client mode */
+void control_init(const char *control_host,
+ const char *control_port,
+ bool server)
+{
+ struct addrinfo hints = {
+ .ai_socktype = SOCK_STREAM,
+ };
+ struct addrinfo *result = NULL;
+ struct addrinfo *ai;
+ int ret;
+
+ ret = getaddrinfo(control_host, control_port, &hints, &result);
+ if (ret != 0) {
+ fprintf(stderr, "%s\n", gai_strerror(ret));
+ exit(EXIT_FAILURE);
+ }
+
+ for (ai = result; ai; ai = ai->ai_next) {
+ int fd;
+ int val = 1;
+
+ fd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
+ if (fd < 0)
+ continue;
+
+ if (!server) {
+ if (connect(fd, ai->ai_addr, ai->ai_addrlen) < 0)
+ goto next;
+ control_fd = fd;
+ printf("Control socket connected to %s:%s.\n",
+ control_host, control_port);
+ break;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
+ &val, sizeof(val)) < 0) {
+ perror("setsockopt");
+ exit(EXIT_FAILURE);
+ }
+
+ if (bind(fd, ai->ai_addr, ai->ai_addrlen) < 0)
+ goto next;
+ if (listen(fd, 1) < 0)
+ goto next;
+
+ printf("Control socket listening on %s:%s\n",
+ control_host, control_port);
+ fflush(stdout);
+
+ control_fd = accept(fd, NULL, 0);
+ close(fd);
+
+ if (control_fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+ printf("Control socket connection accepted...\n");
+ break;
+
+next:
+ close(fd);
+ }
+
+ if (control_fd < 0) {
+ fprintf(stderr, "Control socket initialization failed. Invalid address %s:%s?\n",
+ control_host, control_port);
+ exit(EXIT_FAILURE);
+ }
+
+ freeaddrinfo(result);
+}
+
+/* Free resources */
+void control_cleanup(void)
+{
+ close(control_fd);
+ control_fd = -1;
+}
+
+/* Write a line to the control socket */
+void control_writeln(const char *str)
+{
+ ssize_t len = strlen(str);
+ ssize_t ret;
+
+ timeout_begin(TIMEOUT);
+
+ do {
+ ret = send(control_fd, str, len, MSG_MORE);
+ timeout_check("send");
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret != len) {
+ perror("send");
+ exit(EXIT_FAILURE);
+ }
+
+ do {
+ ret = send(control_fd, "\n", 1, 0);
+ timeout_check("send");
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret != 1) {
+ perror("send");
+ exit(EXIT_FAILURE);
+ }
+
+ timeout_end();
+}
+
+/* Return the next line from the control socket (without the trailing newline).
+ *
+ * The program terminates if a timeout occurs.
+ *
+ * The caller must free() the returned string.
+ */
+char *control_readln(void)
+{
+ char *buf = NULL;
+ size_t idx = 0;
+ size_t buflen = 0;
+
+ timeout_begin(TIMEOUT);
+
+ for (;;) {
+ ssize_t ret;
+
+ if (idx >= buflen) {
+ char *new_buf;
+
+ new_buf = realloc(buf, buflen + 80);
+ if (!new_buf) {
+ perror("realloc");
+ exit(EXIT_FAILURE);
+ }
+
+ buf = new_buf;
+ buflen += 80;
+ }
+
+ do {
+ ret = recv(control_fd, &buf[idx], 1, 0);
+ timeout_check("recv");
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret == 0) {
+ fprintf(stderr, "unexpected EOF on control socket\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (ret != 1) {
+ perror("recv");
+ exit(EXIT_FAILURE);
+ }
+
+ if (buf[idx] == '\n') {
+ buf[idx] = '\0';
+ break;
+ }
+
+ idx++;
+ }
+
+ timeout_end();
+
+ return buf;
+}
+
+/* Wait until a given line is received or a timeout occurs */
+void control_expectln(const char *str)
+{
+ char *line;
+
+ line = control_readln();
+ if (strcmp(str, line) != 0) {
+ fprintf(stderr, "expected \"%s\" on control socket, got \"%s\"\n",
+ str, line);
+ exit(EXIT_FAILURE);
+ }
+
+ free(line);
+}
diff --git a/tools/testing/vsock/control.h b/tools/testing/vsock/control.h
new file mode 100644
index 000000000..54a07efd2
--- /dev/null
+++ b/tools/testing/vsock/control.h
@@ -0,0 +1,13 @@
+#ifndef CONTROL_H
+#define CONTROL_H
+
+#include <stdbool.h>
+
+void control_init(const char *control_host, const char *control_port,
+ bool server);
+void control_cleanup(void);
+void control_writeln(const char *str);
+char *control_readln(void);
+void control_expectln(const char *str);
+
+#endif /* CONTROL_H */
diff --git a/tools/testing/vsock/timeout.c b/tools/testing/vsock/timeout.c
new file mode 100644
index 000000000..c49b3003b
--- /dev/null
+++ b/tools/testing/vsock/timeout.c
@@ -0,0 +1,64 @@
+/* Timeout API for single-threaded programs that use blocking
+ * syscalls (read/write/send/recv/connect/accept).
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+/* Use the following pattern:
+ *
+ * timeout_begin(TIMEOUT);
+ * do {
+ * ret = accept(...);
+ * timeout_check("accept");
+ * } while (ret < 0 && ret == EINTR);
+ * timeout_end();
+ */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <stdio.h>
+#include "timeout.h"
+
+static volatile bool timeout;
+
+/* SIGALRM handler function. Do not use sleep(2), alarm(2), or
+ * setitimer(2) while using this API - they may interfere with each
+ * other.
+ */
+void sigalrm(int signo)
+{
+ timeout = true;
+}
+
+/* Start a timeout. Call timeout_check() to verify that the timeout hasn't
+ * expired. timeout_end() must be called to stop the timeout. Timeouts cannot
+ * be nested.
+ */
+void timeout_begin(unsigned int seconds)
+{
+ alarm(seconds);
+}
+
+/* Exit with an error message if the timeout has expired */
+void timeout_check(const char *operation)
+{
+ if (timeout) {
+ fprintf(stderr, "%s timed out\n", operation);
+ exit(EXIT_FAILURE);
+ }
+}
+
+/* Stop a timeout */
+void timeout_end(void)
+{
+ alarm(0);
+ timeout = false;
+}
diff --git a/tools/testing/vsock/timeout.h b/tools/testing/vsock/timeout.h
new file mode 100644
index 000000000..77db9ce98
--- /dev/null
+++ b/tools/testing/vsock/timeout.h
@@ -0,0 +1,14 @@
+#ifndef TIMEOUT_H
+#define TIMEOUT_H
+
+enum {
+ /* Default timeout */
+ TIMEOUT = 10 /* seconds */
+};
+
+void sigalrm(int signo);
+void timeout_begin(unsigned int seconds);
+void timeout_check(const char *operation);
+void timeout_end(void);
+
+#endif /* TIMEOUT_H */
diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c
new file mode 100644
index 000000000..e896a4af5
--- /dev/null
+++ b/tools/testing/vsock/vsock_diag_test.c
@@ -0,0 +1,681 @@
+/*
+ * vsock_diag_test - vsock_diag.ko test suite
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <getopt.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/list.h>
+#include <linux/net.h>
+#include <linux/netlink.h>
+#include <linux/sock_diag.h>
+#include <netinet/tcp.h>
+
+#include "../../../include/uapi/linux/vm_sockets.h"
+#include "../../../include/uapi/linux/vm_sockets_diag.h"
+
+#include "timeout.h"
+#include "control.h"
+
+enum test_mode {
+ TEST_MODE_UNSET,
+ TEST_MODE_CLIENT,
+ TEST_MODE_SERVER
+};
+
+/* Per-socket status */
+struct vsock_stat {
+ struct list_head list;
+ struct vsock_diag_msg msg;
+};
+
+static const char *sock_type_str(int type)
+{
+ switch (type) {
+ case SOCK_DGRAM:
+ return "DGRAM";
+ case SOCK_STREAM:
+ return "STREAM";
+ default:
+ return "INVALID TYPE";
+ }
+}
+
+static const char *sock_state_str(int state)
+{
+ switch (state) {
+ case TCP_CLOSE:
+ return "UNCONNECTED";
+ case TCP_SYN_SENT:
+ return "CONNECTING";
+ case TCP_ESTABLISHED:
+ return "CONNECTED";
+ case TCP_CLOSING:
+ return "DISCONNECTING";
+ case TCP_LISTEN:
+ return "LISTEN";
+ default:
+ return "INVALID STATE";
+ }
+}
+
+static const char *sock_shutdown_str(int shutdown)
+{
+ switch (shutdown) {
+ case 1:
+ return "RCV_SHUTDOWN";
+ case 2:
+ return "SEND_SHUTDOWN";
+ case 3:
+ return "RCV_SHUTDOWN | SEND_SHUTDOWN";
+ default:
+ return "0";
+ }
+}
+
+static void print_vsock_addr(FILE *fp, unsigned int cid, unsigned int port)
+{
+ if (cid == VMADDR_CID_ANY)
+ fprintf(fp, "*:");
+ else
+ fprintf(fp, "%u:", cid);
+
+ if (port == VMADDR_PORT_ANY)
+ fprintf(fp, "*");
+ else
+ fprintf(fp, "%u", port);
+}
+
+static void print_vsock_stat(FILE *fp, struct vsock_stat *st)
+{
+ print_vsock_addr(fp, st->msg.vdiag_src_cid, st->msg.vdiag_src_port);
+ fprintf(fp, " ");
+ print_vsock_addr(fp, st->msg.vdiag_dst_cid, st->msg.vdiag_dst_port);
+ fprintf(fp, " %s %s %s %u\n",
+ sock_type_str(st->msg.vdiag_type),
+ sock_state_str(st->msg.vdiag_state),
+ sock_shutdown_str(st->msg.vdiag_shutdown),
+ st->msg.vdiag_ino);
+}
+
+static void print_vsock_stats(FILE *fp, struct list_head *head)
+{
+ struct vsock_stat *st;
+
+ list_for_each_entry(st, head, list)
+ print_vsock_stat(fp, st);
+}
+
+static struct vsock_stat *find_vsock_stat(struct list_head *head, int fd)
+{
+ struct vsock_stat *st;
+ struct stat stat;
+
+ if (fstat(fd, &stat) < 0) {
+ perror("fstat");
+ exit(EXIT_FAILURE);
+ }
+
+ list_for_each_entry(st, head, list)
+ if (st->msg.vdiag_ino == stat.st_ino)
+ return st;
+
+ fprintf(stderr, "cannot find fd %d\n", fd);
+ exit(EXIT_FAILURE);
+}
+
+static void check_no_sockets(struct list_head *head)
+{
+ if (!list_empty(head)) {
+ fprintf(stderr, "expected no sockets\n");
+ print_vsock_stats(stderr, head);
+ exit(1);
+ }
+}
+
+static void check_num_sockets(struct list_head *head, int expected)
+{
+ struct list_head *node;
+ int n = 0;
+
+ list_for_each(node, head)
+ n++;
+
+ if (n != expected) {
+ fprintf(stderr, "expected %d sockets, found %d\n",
+ expected, n);
+ print_vsock_stats(stderr, head);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void check_socket_state(struct vsock_stat *st, __u8 state)
+{
+ if (st->msg.vdiag_state != state) {
+ fprintf(stderr, "expected socket state %#x, got %#x\n",
+ state, st->msg.vdiag_state);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void send_req(int fd)
+{
+ struct sockaddr_nl nladdr = {
+ .nl_family = AF_NETLINK,
+ };
+ struct {
+ struct nlmsghdr nlh;
+ struct vsock_diag_req vreq;
+ } req = {
+ .nlh = {
+ .nlmsg_len = sizeof(req),
+ .nlmsg_type = SOCK_DIAG_BY_FAMILY,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
+ },
+ .vreq = {
+ .sdiag_family = AF_VSOCK,
+ .vdiag_states = ~(__u32)0,
+ },
+ };
+ struct iovec iov = {
+ .iov_base = &req,
+ .iov_len = sizeof(req),
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+
+ for (;;) {
+ if (sendmsg(fd, &msg, 0) < 0) {
+ if (errno == EINTR)
+ continue;
+
+ perror("sendmsg");
+ exit(EXIT_FAILURE);
+ }
+
+ return;
+ }
+}
+
+static ssize_t recv_resp(int fd, void *buf, size_t len)
+{
+ struct sockaddr_nl nladdr = {
+ .nl_family = AF_NETLINK,
+ };
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = len,
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ ssize_t ret;
+
+ do {
+ ret = recvmsg(fd, &msg, 0);
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret < 0) {
+ perror("recvmsg");
+ exit(EXIT_FAILURE);
+ }
+
+ return ret;
+}
+
+static void add_vsock_stat(struct list_head *sockets,
+ const struct vsock_diag_msg *resp)
+{
+ struct vsock_stat *st;
+
+ st = malloc(sizeof(*st));
+ if (!st) {
+ perror("malloc");
+ exit(EXIT_FAILURE);
+ }
+
+ st->msg = *resp;
+ list_add_tail(&st->list, sockets);
+}
+
+/*
+ * Read vsock stats into a list.
+ */
+static void read_vsock_stat(struct list_head *sockets)
+{
+ long buf[8192 / sizeof(long)];
+ int fd;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+ if (fd < 0) {
+ perror("socket");
+ exit(EXIT_FAILURE);
+ }
+
+ send_req(fd);
+
+ for (;;) {
+ const struct nlmsghdr *h;
+ ssize_t ret;
+
+ ret = recv_resp(fd, buf, sizeof(buf));
+ if (ret == 0)
+ goto done;
+ if (ret < sizeof(*h)) {
+ fprintf(stderr, "short read of %zd bytes\n", ret);
+ exit(EXIT_FAILURE);
+ }
+
+ h = (struct nlmsghdr *)buf;
+
+ while (NLMSG_OK(h, ret)) {
+ if (h->nlmsg_type == NLMSG_DONE)
+ goto done;
+
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ const struct nlmsgerr *err = NLMSG_DATA(h);
+
+ if (h->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
+ fprintf(stderr, "NLMSG_ERROR\n");
+ else {
+ errno = -err->error;
+ perror("NLMSG_ERROR");
+ }
+
+ exit(EXIT_FAILURE);
+ }
+
+ if (h->nlmsg_type != SOCK_DIAG_BY_FAMILY) {
+ fprintf(stderr, "unexpected nlmsg_type %#x\n",
+ h->nlmsg_type);
+ exit(EXIT_FAILURE);
+ }
+ if (h->nlmsg_len <
+ NLMSG_LENGTH(sizeof(struct vsock_diag_msg))) {
+ fprintf(stderr, "short vsock_diag_msg\n");
+ exit(EXIT_FAILURE);
+ }
+
+ add_vsock_stat(sockets, NLMSG_DATA(h));
+
+ h = NLMSG_NEXT(h, ret);
+ }
+ }
+
+done:
+ close(fd);
+}
+
+static void free_sock_stat(struct list_head *sockets)
+{
+ struct vsock_stat *st;
+ struct vsock_stat *next;
+
+ list_for_each_entry_safe(st, next, sockets, list)
+ free(st);
+}
+
+static void test_no_sockets(unsigned int peer_cid)
+{
+ LIST_HEAD(sockets);
+
+ read_vsock_stat(&sockets);
+
+ check_no_sockets(&sockets);
+
+ free_sock_stat(&sockets);
+}
+
+static void test_listen_socket_server(unsigned int peer_cid)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = 1234,
+ .svm_cid = VMADDR_CID_ANY,
+ },
+ };
+ LIST_HEAD(sockets);
+ struct vsock_stat *st;
+ int fd;
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
+ perror("bind");
+ exit(EXIT_FAILURE);
+ }
+
+ if (listen(fd, 1) < 0) {
+ perror("listen");
+ exit(EXIT_FAILURE);
+ }
+
+ read_vsock_stat(&sockets);
+
+ check_num_sockets(&sockets, 1);
+ st = find_vsock_stat(&sockets, fd);
+ check_socket_state(st, TCP_LISTEN);
+
+ close(fd);
+ free_sock_stat(&sockets);
+}
+
+static void test_connect_client(unsigned int peer_cid)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = 1234,
+ .svm_cid = peer_cid,
+ },
+ };
+ int fd;
+ int ret;
+ LIST_HEAD(sockets);
+ struct vsock_stat *st;
+
+ control_expectln("LISTENING");
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ timeout_begin(TIMEOUT);
+ do {
+ ret = connect(fd, &addr.sa, sizeof(addr.svm));
+ timeout_check("connect");
+ } while (ret < 0 && errno == EINTR);
+ timeout_end();
+
+ if (ret < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ read_vsock_stat(&sockets);
+
+ check_num_sockets(&sockets, 1);
+ st = find_vsock_stat(&sockets, fd);
+ check_socket_state(st, TCP_ESTABLISHED);
+
+ control_expectln("DONE");
+ control_writeln("DONE");
+
+ close(fd);
+ free_sock_stat(&sockets);
+}
+
+static void test_connect_server(unsigned int peer_cid)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = 1234,
+ .svm_cid = VMADDR_CID_ANY,
+ },
+ };
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } clientaddr;
+ socklen_t clientaddr_len = sizeof(clientaddr.svm);
+ LIST_HEAD(sockets);
+ struct vsock_stat *st;
+ int fd;
+ int client_fd;
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
+ perror("bind");
+ exit(EXIT_FAILURE);
+ }
+
+ if (listen(fd, 1) < 0) {
+ perror("listen");
+ exit(EXIT_FAILURE);
+ }
+
+ control_writeln("LISTENING");
+
+ timeout_begin(TIMEOUT);
+ do {
+ client_fd = accept(fd, &clientaddr.sa, &clientaddr_len);
+ timeout_check("accept");
+ } while (client_fd < 0 && errno == EINTR);
+ timeout_end();
+
+ if (client_fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+ if (clientaddr.sa.sa_family != AF_VSOCK) {
+ fprintf(stderr, "expected AF_VSOCK from accept(2), got %d\n",
+ clientaddr.sa.sa_family);
+ exit(EXIT_FAILURE);
+ }
+ if (clientaddr.svm.svm_cid != peer_cid) {
+ fprintf(stderr, "expected peer CID %u from accept(2), got %u\n",
+ peer_cid, clientaddr.svm.svm_cid);
+ exit(EXIT_FAILURE);
+ }
+
+ read_vsock_stat(&sockets);
+
+ check_num_sockets(&sockets, 2);
+ find_vsock_stat(&sockets, fd);
+ st = find_vsock_stat(&sockets, client_fd);
+ check_socket_state(st, TCP_ESTABLISHED);
+
+ control_writeln("DONE");
+ control_expectln("DONE");
+
+ close(client_fd);
+ close(fd);
+ free_sock_stat(&sockets);
+}
+
+static struct {
+ const char *name;
+ void (*run_client)(unsigned int peer_cid);
+ void (*run_server)(unsigned int peer_cid);
+} test_cases[] = {
+ {
+ .name = "No sockets",
+ .run_server = test_no_sockets,
+ },
+ {
+ .name = "Listen socket",
+ .run_server = test_listen_socket_server,
+ },
+ {
+ .name = "Connect",
+ .run_client = test_connect_client,
+ .run_server = test_connect_server,
+ },
+ {},
+};
+
+static void init_signals(void)
+{
+ struct sigaction act = {
+ .sa_handler = sigalrm,
+ };
+
+ sigaction(SIGALRM, &act, NULL);
+ signal(SIGPIPE, SIG_IGN);
+}
+
+static unsigned int parse_cid(const char *str)
+{
+ char *endptr = NULL;
+ unsigned long int n;
+
+ errno = 0;
+ n = strtoul(str, &endptr, 10);
+ if (errno || *endptr != '\0') {
+ fprintf(stderr, "malformed CID \"%s\"\n", str);
+ exit(EXIT_FAILURE);
+ }
+ return n;
+}
+
+static const char optstring[] = "";
+static const struct option longopts[] = {
+ {
+ .name = "control-host",
+ .has_arg = required_argument,
+ .val = 'H',
+ },
+ {
+ .name = "control-port",
+ .has_arg = required_argument,
+ .val = 'P',
+ },
+ {
+ .name = "mode",
+ .has_arg = required_argument,
+ .val = 'm',
+ },
+ {
+ .name = "peer-cid",
+ .has_arg = required_argument,
+ .val = 'p',
+ },
+ {
+ .name = "help",
+ .has_arg = no_argument,
+ .val = '?',
+ },
+ {},
+};
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid>\n"
+ "\n"
+ " Server: vsock_diag_test --control-port=1234 --mode=server --peer-cid=3\n"
+ " Client: vsock_diag_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n"
+ "\n"
+ "Run vsock_diag.ko tests. Must be launched in both\n"
+ "guest and host. One side must use --mode=client and\n"
+ "the other side must use --mode=server.\n"
+ "\n"
+ "A TCP control socket connection is used to coordinate tests\n"
+ "between the client and the server. The server requires a\n"
+ "listen address and the client requires an address to\n"
+ "connect to.\n"
+ "\n"
+ "The CID of the other side must be given with --peer-cid=<cid>.\n");
+ exit(EXIT_FAILURE);
+}
+
+int main(int argc, char **argv)
+{
+ const char *control_host = NULL;
+ const char *control_port = NULL;
+ int mode = TEST_MODE_UNSET;
+ unsigned int peer_cid = VMADDR_CID_ANY;
+ int i;
+
+ init_signals();
+
+ for (;;) {
+ int opt = getopt_long(argc, argv, optstring, longopts, NULL);
+
+ if (opt == -1)
+ break;
+
+ switch (opt) {
+ case 'H':
+ control_host = optarg;
+ break;
+ case 'm':
+ if (strcmp(optarg, "client") == 0)
+ mode = TEST_MODE_CLIENT;
+ else if (strcmp(optarg, "server") == 0)
+ mode = TEST_MODE_SERVER;
+ else {
+ fprintf(stderr, "--mode must be \"client\" or \"server\"\n");
+ return EXIT_FAILURE;
+ }
+ break;
+ case 'p':
+ peer_cid = parse_cid(optarg);
+ break;
+ case 'P':
+ control_port = optarg;
+ break;
+ case '?':
+ default:
+ usage();
+ }
+ }
+
+ if (!control_port)
+ usage();
+ if (mode == TEST_MODE_UNSET)
+ usage();
+ if (peer_cid == VMADDR_CID_ANY)
+ usage();
+
+ if (!control_host) {
+ if (mode != TEST_MODE_SERVER)
+ usage();
+ control_host = "0.0.0.0";
+ }
+
+ control_init(control_host, control_port, mode == TEST_MODE_SERVER);
+
+ for (i = 0; test_cases[i].name; i++) {
+ void (*run)(unsigned int peer_cid);
+
+ printf("%s...", test_cases[i].name);
+ fflush(stdout);
+
+ if (mode == TEST_MODE_CLIENT)
+ run = test_cases[i].run_client;
+ else
+ run = test_cases[i].run_server;
+
+ if (run)
+ run(peer_cid);
+
+ printf("ok\n");
+ }
+
+ control_cleanup();
+ return EXIT_SUCCESS;
+}